Minor changes

This commit is contained in:
codez0mb1e 2022-03-29 08:32:55 +00:00
parent e1cd3ab1b6
commit 0da93e8fa3

View File

@ -53,7 +53,7 @@ currencies_ds = Dataset.get_by_name(ws, name='Currencies')
currencies_ds.to_pandas_dataframe() currencies_ds.to_pandas_dataframe()
print(f'Dataset name: {currencies_ds.name}. Description: {currencies_ds.description}.') print(f'Dataset name: {currencies_ds.name}. Description: {currencies_ds.description}.')
print(f'Size of Azure ML dataset: {sys.getsizeof(currencies_ds)} bytes.') print(f'Size of Azure ML dataset object: {sys.getsizeof(currencies_ds)} bytes.')
# %% Preprocessing ---- # %% Preprocessing ----
@ -71,7 +71,7 @@ quotes_df = (currencies_ds
.set_index('date') .set_index('date')
.sort_values(by='date')) .sort_values(by='date'))
quotes_df.head(10) quotes_df
# %% Discover data ---- # %% Discover data ----
@ -80,35 +80,35 @@ quotes_df.groupby('symbol')['close'].agg(['count', 'last'])
# %% USD/RUB dataset ---- # %% USD/RUB dataset ----
usdrub_df = quotes_df[quotes_df.symbol == 'USD/RUB'] usdrub_df = quotes_df[quotes_df.symbol == 'USD/RUB']
usdrub_df
pd.concat([
usdrub_df['close'].head(5),
usdrub_df['close'].tail(5)
])
# %% Calculate Return ---- # %% Calculate Return ----
def get_returns(close_prices) -> pd.Series: def calc_returns(close_prices: pd.Series) -> pd.Series:
"""Calculate Investment Return"""
return (close_prices/close_prices.shift()) - 1 return (close_prices/close_prices.shift()) - 1
usdrub_df['diff'] = usdrub_df['close'].diff() usdrub_df['diff'] = usdrub_df['close'].diff()
usdrub_df['return'] = get_returns(usdrub_df['close']) usdrub_df['return'] = calc_returns(usdrub_df['close'])
usdrub_df[['close', 'diff', 'return']].tail(10) usdrub_df[['close', 'diff', 'return']].tail(10)
# %% Calculate LogReturn ---- # %% Calculate LogReturn ----
def get_log_returns(return_prices) -> pd.Series: def calc_log_returns(return_prices: pd.Series) -> pd.Series:
"""Calculate Log Return"""
return np.log(1 + return_prices) return np.log(1 + return_prices)
usdrub_df['log_return'] = usdrub_df['return'].apply(lambda x: get_log_returns(x)) usdrub_df['log_return'] = usdrub_df['return'].apply(lambda x: calc_log_returns(x))
usdrub_df[['close', 'diff', 'return', 'log_return']].tail(10) usdrub_df[['close', 'diff', 'return', 'log_return']].tail(10)
# %% Simulate possible LogReturns ---- # %% Simulate possible LogReturns ----
def get_simulated_returns(log_returns: pd.Series, n_days: int, n_iterations: int) -> pd.Series: def calc_simulated_returns(log_returns: pd.Series, n_days: int, n_iterations: int) -> pd.Series:
"""Calculate Simulated Return"""
u = log_returns.mean() u = log_returns.mean()
var = log_returns.var() var = log_returns.var()
stdev = log_returns.std() stdev = log_returns.std()
@ -119,7 +119,7 @@ def get_simulated_returns(log_returns: pd.Series, n_days: int, n_iterations: int
return np.exp(drift + stdev*Z) return np.exp(drift + stdev*Z)
usdrub_simulated_returns = get_simulated_returns( usdrub_simulated_returns = calc_simulated_returns(
usdrub_df['log_return'].dropna(), usdrub_df['log_return'].dropna(),
n_days, n_days,
n_simulations) n_simulations)
@ -140,9 +140,9 @@ def get_breakeven_prob(pred, risk_free_rate: float = 0.02) -> pd.Series:
""" """
init_pred = pred.iloc[0, 0] init_pred = pred.iloc[0, 0]
pred = pred.iloc[-1]
pred_list = list(pred) last_pred = pred.iloc[-1]
pred_list = list(last_pred)
over = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred >= risk_free_rate] over = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred >= risk_free_rate]
less = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred < risk_free_rate] less = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred < risk_free_rate]
@ -150,7 +150,7 @@ def get_breakeven_prob(pred, risk_free_rate: float = 0.02) -> pd.Series:
return len(over)/(len(over) + len(less)) return len(over)/(len(over) + len(less))
def evaluate_simulation(simulated_returns: pd.Series, last_actual_price: float, n_days: int, plot = True) -> pd.DataFrame: def evaluate_simulation(simulated_returns: pd.Series, last_actual_price: float, n_days: int, plot: bool = True) -> pd.DataFrame:
""" """
Evaluate Monte-Carlo simulations result Evaluate Monte-Carlo simulations result
""" """
@ -211,9 +211,9 @@ quotes_data = [quotes_df.query('symbol == @s') for s in quotes_df.symbol.unique(
symbols_list = [df.symbol.unique() for df in quotes_data] symbols_list = [df.symbol.unique() for df in quotes_data]
# 2. simulate # 2. simulate
returns_data = [get_returns(df['close']) for df in quotes_data] returns_data = [calc_returns(df['close']) for df in quotes_data]
log_returns_data = [get_log_returns(r) for r in returns_data] log_returns_data = [calc_log_returns(r) for r in returns_data]
simulated_returns_data = [get_simulated_returns(lr, n_days, n_simulations) for lr in log_returns_data] simulated_returns_data = [calc_simulated_returns(lr, n_days, n_simulations) for lr in log_returns_data]
assert( assert(
len(quotes_data) > 0 len(quotes_data) > 0