Release script (beta)

This commit is contained in:
codez0mb1e 2022-03-27 13:26:37 +00:00
parent 225a958899
commit e1cd3ab1b6

View File

@ -10,21 +10,20 @@ Description:
# core # core
import sys import sys
import warnings import warnings
from IPython import sys_info
# data science # data science
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from scipy.stats import norm from scipy.stats import norm
from IPython import sys_info
# Cloud integration
from azureml.core import Workspace, Dataset, ComputeTarget, VERSION as aml_version
print(f'Azure ML SDK v{aml_version}')
# plots # plots
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns import seaborn as sns
# Cloud integration
from azureml.core import Workspace, Dataset, VERSION as aml_version
print(f'Azure ML SDK v{aml_version}')
# show info about python env # show info about python env
print(sys_info()) print(sys_info())
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
@ -34,20 +33,23 @@ warnings.filterwarnings("ignore")
symbols = ['USD/CHF', 'USD/CNY', 'USD/EUR', 'USD/GBP', 'USD/HKD', 'USD/JPY', 'USD/KZT', 'USD/RUB'] symbols = ['USD/CHF', 'USD/CNY', 'USD/EUR', 'USD/GBP', 'USD/HKD', 'USD/JPY', 'USD/KZT', 'USD/RUB']
n_days = int(252) # US market has 252 trading days in a year n_days = int(252) # US market has 252 trading days in a year
n_iterations = int(1e4) n_simulations = int(1e4)
# %% Connect to Azure ML workspace # %% Connect to Azure ML workspace ----
subscription_id = '9aef4ce1-e591-4870-9443-0b0eb98df2aa' ws = Workspace.from_config()
resource_group = 'ai-bootcamp-rg' print(f"Connected to *{ws.get_details()['friendlyName']}* workspace in *{ws.get_details()['location']}*.")
workspace_name = 'portf-opt-ws'
workspace = Workspace(subscription_id, resource_group, workspace_name) # Workspace.from_config() print('Compute Targets:')
print(f"Connected to *{workspace.get_details()['friendlyName']}* workspace in *{workspace.get_details()['location']}*.") for compute_name in ws.compute_targets:
compute = ws.compute_targets[compute_name]
print('\t', compute.name, ':', compute.type)
# > htop
# %% # %% Load dateset ----
currencies_ds = Dataset.get_by_name(workspace, name='Currencies') currencies_ds = Dataset.get_by_name(ws, name='Currencies')
currencies_ds.to_pandas_dataframe() currencies_ds.to_pandas_dataframe()
print(f'Dataset name: {currencies_ds.name}. Description: {currencies_ds.description}.') print(f'Dataset name: {currencies_ds.name}. Description: {currencies_ds.description}.')
@ -85,7 +87,7 @@ pd.concat([
]) ])
# %% Calculate Return # %% Calculate Return ----
def get_returns(close_prices) -> pd.Series: def get_returns(close_prices) -> pd.Series:
return (close_prices/close_prices.shift()) - 1 return (close_prices/close_prices.shift()) - 1
@ -96,8 +98,7 @@ usdrub_df['return'] = get_returns(usdrub_df['close'])
usdrub_df[['close', 'diff', 'return']].tail(10) usdrub_df[['close', 'diff', 'return']].tail(10)
# %% Calculate LogReturn ----
# %% Calculate LogReturn
def get_log_returns(return_prices) -> pd.Series: def get_log_returns(return_prices) -> pd.Series:
return np.log(1 + return_prices) return np.log(1 + return_prices)
@ -105,7 +106,7 @@ usdrub_df['log_return'] = usdrub_df['return'].apply(lambda x: get_log_returns(x)
usdrub_df[['close', 'diff', 'return', 'log_return']].tail(10) usdrub_df[['close', 'diff', 'return', 'log_return']].tail(10)
# %% Simulate possible LogReturns # %% Simulate possible LogReturns ----
def get_simulated_returns(log_returns: pd.Series, n_days: int, n_iterations: int) -> pd.Series: def get_simulated_returns(log_returns: pd.Series, n_days: int, n_iterations: int) -> pd.Series:
u = log_returns.mean() u = log_returns.mean()
@ -121,10 +122,10 @@ def get_simulated_returns(log_returns: pd.Series, n_days: int, n_iterations: int
usdrub_simulated_returns = get_simulated_returns( usdrub_simulated_returns = get_simulated_returns(
usdrub_df['log_return'].dropna(), usdrub_df['log_return'].dropna(),
n_days, n_days,
n_iterations) n_simulations)
assert( assert(
usdrub_simulated_returns.shape == (n_days, n_iterations) usdrub_simulated_returns.shape == (n_days, n_simulations)
and (usdrub_simulated_returns > 0).all() and (usdrub_simulated_returns > 0).all()
and (usdrub_simulated_returns < 2).all() and (usdrub_simulated_returns < 2).all()
) )
@ -132,10 +133,10 @@ assert(
# %% Monte carlo simulation evaluation ---- # %% Monte carlo simulation evaluation ----
def get_breakeven_prob(pred, threshold: float = 0.) -> pd.Series: def get_breakeven_prob(pred, risk_free_rate: float = 0.02) -> pd.Series:
""" """
Calculation of the probability of a stock being above a certain threshold, Calculation of the probability of a stock being above a certain threshold,
which can be defined as a value (final stock price) or return rate (percentage change) which can be defined as a value (final stock price) or return rate (percentage change).
""" """
init_pred = pred.iloc[0, 0] init_pred = pred.iloc[0, 0]
@ -143,21 +144,21 @@ def get_breakeven_prob(pred, threshold: float = 0.) -> pd.Series:
pred_list = list(pred) pred_list = list(pred)
over = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred >= threshold] over = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred >= risk_free_rate]
less = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred < threshold] less = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred < risk_free_rate]
return len(over)/(len(over) + len(less)) return len(over)/(len(over) + len(less))
def evaluate_simulation(simulated_returns: pd.Series, last_actual_price: float, n_days: int, plot = True) -> pd.DataFrame: def evaluate_simulation(simulated_returns: pd.Series, last_actual_price: float, n_days: int, plot = True) -> pd.DataFrame:
""" """
Evaluate Monte-Carlo simulations result
""" """
# Create empty matrix # Create empty matrix
price_list = np.zeros_like(simulated_returns) price_list = np.zeros_like(simulated_returns)
# Put the last actual price in the first row # Put the last actual price in the first row,
# and calculate the price of each day # and calculate the price of each day
price_list[0] = last_actual_price price_list[0] = last_actual_price
for t in range(1, n_days): for t in range(1, n_days):
@ -175,15 +176,16 @@ def evaluate_simulation(simulated_returns: pd.Series, last_actual_price: float,
plt.xlabel('Stock Price') plt.xlabel('Stock Price')
plt.show() plt.show()
print(f'Investment period: {n_days-1} days') print('Results:')
print(f'Expected Value: {round(price_df.iloc[-1].mean(), 2)} per USD') print(f'\tInvestment period: {n_days-1} days')
print(f'Return: {round(100*(price_df.iloc[-1].mean() - price_list[0,1]) /price_df.iloc[-1].mean(), 2)}%') print(f'\tExpected Value: {round(price_df.iloc[-1].mean(), 2)} per USD')
print(f'Probability of Breakeven: {get_breakeven_prob(price_df)}') print(f'\tReturn: {round(100*(price_df.iloc[-1].mean() - price_list[0,1])/price_df.iloc[-1].mean(), 2)}%')
print(f'\tProbability of Breakeven: {get_breakeven_prob(price_df)}')
return price_df return price_df
# %% Run Monte carlo simulation and estimate result # %% Run Monte carlo simulation and estimate result ----
usdrub_mc_simulation_df = evaluate_simulation( usdrub_mc_simulation_df = evaluate_simulation(
usdrub_simulated_returns, usdrub_simulated_returns,
@ -192,29 +194,46 @@ usdrub_mc_simulation_df = evaluate_simulation(
plt.figure(figsize=(10, 6)) plt.figure(figsize=(10, 6))
plt.plot(usdrub_mc_simulation_df.sample(20, axis='columns')) plt.plot(usdrub_mc_simulation_df.sample(10, axis='columns'))
plt.title('USD/RUB Price Simulation')
plt.xlabel('Days')
plt.ylabel('RUB per $1')
plt.ylim(10, 300)
plt.show() plt.show()
# %% Monte Carlo simulation pipeline for multiple tokens ---- # %% Monte Carlo simulation pipeline for multiple tokens ----
# 0. set simulation params
n_simulations = int(1e4)
# 1. prepare # 1. prepare
n_iterations = int(1e4) #! WARN: set simulations number
quotes_data = [quotes_df.query('symbol == @s') for s in quotes_df.symbol.unique()] quotes_data = [quotes_df.query('symbol == @s') for s in quotes_df.symbol.unique()]
symbols_list = [df.symbol.unique() for df in quotes_data]
# 2. simulate # 2. simulate
returns_data = [get_returns(df['close']) for df in quotes_data] returns_data = [get_returns(df['close']) for df in quotes_data]
log_returns_data = [get_log_returns(r) for r in returns_data] log_returns_data = [get_log_returns(r) for r in returns_data]
simulated_returns_data = [get_simulated_returns(lr, n_days, n_iterations) for lr in log_returns_data] simulated_returns_data = [get_simulated_returns(lr, n_days, n_simulations) for lr in log_returns_data]
assert(
len(quotes_data) > 0
and len(quotes_data) == len(symbols_list) == len(returns_data) == len(log_returns_data) == len(simulated_returns_data)
)
# 3. evaluate # 3. evaluate
for i in range(len(simulated_returns_data)): for i in range(len(symbols_list)):
print(f'---- Starting Monte-Carlo simulation for {symbols[i]} symbol... ----') print(f'---- Starting Monte-Carlo simulation for {symbols_list[i]} symbol ----')
prices_ms = evaluate_simulation(simulated_returns_data[i], quotes_data[i]['close'].tail(1), n_days, plot=True)
prices_ms = evaluate_simulation(simulated_returns_data[i], quotes_data[i]['close'].tail(1), n_days, plot=False)
plt.figure(figsize=(10, 6)) plt.figure(figsize=(10, 6))
plt.plot(prices_ms.iloc[:, 1:50]) plt.plot(prices_ms.sample(100, axis='columns'))
plt.title(f'{symbols_list[i]} Price Simulation')
plt.xlabel('Days')
plt.ylabel('Amount per $1')
plt.show() plt.show()
# %%
# %% Completed ----
gc()