From e1cd3ab1b6c0cac51b6d03c17e74ac8175091977 Mon Sep 17 00:00:00 2001
From: codez0mb1e <d.petukhov@outlook.com>
Date: Sun, 27 Mar 2022 13:26:37 +0000
Subject: [PATCH] Release script (beta)

---
 src/currency_portf.py | 119 ++++++++++++++++++++++++------------------
 1 file changed, 69 insertions(+), 50 deletions(-)

diff --git a/src/currency_portf.py b/src/currency_portf.py
index ba15bcc..400e093 100644
--- a/src/currency_portf.py
+++ b/src/currency_portf.py
@@ -10,21 +10,20 @@ Description:
 # core
 import sys
 import warnings
+from IPython import sys_info
 
 # data science
 import pandas as pd
 import numpy as np
 from scipy.stats import norm
-from IPython import sys_info
 
+# Cloud integration
+from azureml.core import Workspace, Dataset, ComputeTarget, VERSION as aml_version
+print(f'Azure ML SDK v{aml_version}')
 # plots
 import matplotlib.pyplot as plt
 import seaborn as sns
 
-# Cloud integration
-from azureml.core import Workspace, Dataset, VERSION as aml_version
-print(f'Azure ML SDK v{aml_version}')
-
 # show info about python env
 print(sys_info())
 warnings.filterwarnings("ignore")
@@ -33,21 +32,24 @@ warnings.filterwarnings("ignore")
 # %% Set params ----
 symbols = ['USD/CHF', 'USD/CNY', 'USD/EUR', 'USD/GBP', 'USD/HKD', 'USD/JPY', 'USD/KZT', 'USD/RUB']
 
-n_days = int(252) # US market has 252 trading days in a year
-n_iterations = int(1e4)
+n_days = int(252)  # US market has 252 trading days in a year
+n_simulations = int(1e4)
 
 
-# %% Connect to Azure ML workspace
-subscription_id = '9aef4ce1-e591-4870-9443-0b0eb98df2aa'
-resource_group = 'ai-bootcamp-rg'
-workspace_name = 'portf-opt-ws'
+# %% Connect to Azure ML workspace ----
+ws =  Workspace.from_config()
+print(f"Connected to *{ws.get_details()['friendlyName']}* workspace in *{ws.get_details()['location']}*.")
 
-workspace = Workspace(subscription_id, resource_group, workspace_name) # Workspace.from_config()
-print(f"Connected to *{workspace.get_details()['friendlyName']}* workspace in *{workspace.get_details()['location']}*.")
+print('Compute Targets:')
+for compute_name in ws.compute_targets:
+    compute = ws.compute_targets[compute_name]
+    print('\t', compute.name, ':', compute.type)
+
+# > htop
 
 
-# %%
-currencies_ds = Dataset.get_by_name(workspace, name='Currencies')
+# %% Load dateset ----
+currencies_ds = Dataset.get_by_name(ws, name='Currencies')
 currencies_ds.to_pandas_dataframe()
 
 print(f'Dataset name: {currencies_ds.name}. Description: {currencies_ds.description}.')
@@ -85,7 +87,7 @@ pd.concat([
 ])
 
 
-# %% Calculate Return
+# %% Calculate Return ----
 def get_returns(close_prices) -> pd.Series:
     return (close_prices/close_prices.shift()) - 1
 
@@ -96,8 +98,7 @@ usdrub_df['return'] = get_returns(usdrub_df['close'])
 usdrub_df[['close', 'diff', 'return']].tail(10)
 
 
-
-# %% Calculate LogReturn
+# %% Calculate LogReturn ----
 def get_log_returns(return_prices) -> pd.Series:
     return np.log(1 + return_prices)
 
@@ -105,7 +106,7 @@ usdrub_df['log_return'] = usdrub_df['return'].apply(lambda x: get_log_returns(x)
 usdrub_df[['close', 'diff', 'return', 'log_return']].tail(10)
 
 
-# %% Simulate possible LogReturns
+# %% Simulate possible LogReturns ----
 
 def get_simulated_returns(log_returns: pd.Series, n_days: int, n_iterations: int) -> pd.Series:
     u = log_returns.mean()
@@ -114,17 +115,17 @@ def get_simulated_returns(log_returns: pd.Series, n_days: int, n_iterations: int
 
     drift = u - (0.5*var)
     Z = norm.ppf(np.random.rand(n_days, n_iterations))
-    
+
     return np.exp(drift + stdev*Z)
 
 
 usdrub_simulated_returns = get_simulated_returns(
-    usdrub_df['log_return'].dropna(), 
-    n_days, 
-    n_iterations)
+    usdrub_df['log_return'].dropna(),
+    n_days,
+    n_simulations)
 
 assert(
-    usdrub_simulated_returns.shape == (n_days, n_iterations)
+    usdrub_simulated_returns.shape == (n_days, n_simulations)
     and (usdrub_simulated_returns > 0).all()
     and (usdrub_simulated_returns < 2).all()
 )
@@ -132,10 +133,10 @@ assert(
 
 # %% Monte carlo simulation evaluation ----
 
-def get_breakeven_prob(pred, threshold: float = 0.) -> pd.Series:
+def get_breakeven_prob(pred, risk_free_rate: float = 0.02) -> pd.Series:
     """
-    Calculation of the probability of a stock being above a certain threshold, 
-    which can be defined as a value (final stock price) or return rate (percentage change)
+    Calculation of the probability of a stock being above a certain threshold,
+    which can be defined as a value (final stock price) or return rate (percentage change).
     """
 
     init_pred = pred.iloc[0, 0]
@@ -143,21 +144,21 @@ def get_breakeven_prob(pred, threshold: float = 0.) -> pd.Series:
 
     pred_list = list(pred)
 
-    over = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred >= threshold]
-    less = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred < threshold]
+    over = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred >= risk_free_rate]
+    less = [(p*100)/init_pred for p in pred_list if ((p-init_pred)*100)/init_pred < risk_free_rate]
 
     return len(over)/(len(over) + len(less))
 
 
 def evaluate_simulation(simulated_returns: pd.Series, last_actual_price: float, n_days: int, plot = True) -> pd.DataFrame:
-    """ 
-
+    """
+    Evaluate Monte-Carlo simulations result
     """
 
     # Create empty matrix
     price_list = np.zeros_like(simulated_returns)
 
-    # Put the last actual price in the first row 
+    # Put the last actual price in the first row,
     # and calculate the price of each day
     price_list[0] = last_actual_price
     for t in range(1, n_days):
@@ -169,52 +170,70 @@ def evaluate_simulation(simulated_returns: pd.Series, last_actual_price: float,
     # Plot
     if plot == True:
         x = price_df.iloc[-1]
-        fig, ax = plt.subplots(1, 2, figsize=(14,4))
+        fig, ax = plt.subplots(1, 2, figsize=(14, 4))
         sns.distplot(x, ax=ax[0])
         sns.distplot(x, hist_kws={'cumulative': True}, kde_kws={'cumulative': True}, ax=ax[1])
         plt.xlabel('Stock Price')
         plt.show()
 
-    print(f'Investment period: {n_days-1} days')
-    print(f'Expected Value: {round(price_df.iloc[-1].mean(), 2)} per USD')
-    print(f'Return: {round(100*(price_df.iloc[-1].mean() - price_list[0,1]) /price_df.iloc[-1].mean(), 2)}%')
-    print(f'Probability of Breakeven: {get_breakeven_prob(price_df)}')
+    print('Results:')
+    print(f'\tInvestment period: {n_days-1} days')
+    print(f'\tExpected Value: {round(price_df.iloc[-1].mean(), 2)} per USD')
+    print(f'\tReturn: {round(100*(price_df.iloc[-1].mean() - price_list[0,1])/price_df.iloc[-1].mean(), 2)}%')
+    print(f'\tProbability of Breakeven: {get_breakeven_prob(price_df)}')
 
     return price_df
 
 
-# %% Run Monte carlo simulation and estimate result
+# %% Run Monte carlo simulation and estimate result ----
 
 usdrub_mc_simulation_df = evaluate_simulation(
-    usdrub_simulated_returns, 
+    usdrub_simulated_returns,
     last_actual_price = usdrub_df['close'].tail(1),
     n_days = n_days)
 
 
-plt.figure(figsize=(10,6))
-plt.plot(usdrub_mc_simulation_df.sample(20, axis='columns'))
+plt.figure(figsize=(10, 6))
+plt.plot(usdrub_mc_simulation_df.sample(10, axis='columns'))
+plt.title('USD/RUB Price Simulation')
+plt.xlabel('Days')
+plt.ylabel('RUB per $1')
+plt.ylim(10, 300)
 plt.show()
 
 
-
 # %% Monte Carlo simulation pipeline for multiple tokens ----
+# 0. set simulation params
+n_simulations = int(1e4)
 
 # 1. prepare
-n_iterations = int(1e4) #! WARN: set simulations number
 quotes_data = [quotes_df.query('symbol == @s') for s in quotes_df.symbol.unique()]
+symbols_list = [df.symbol.unique() for df in quotes_data]
 
 # 2. simulate
 returns_data = [get_returns(df['close']) for df in quotes_data]
 log_returns_data = [get_log_returns(r) for r in returns_data]
-simulated_returns_data = [get_simulated_returns(lr, n_days, n_iterations) for lr in log_returns_data]
+simulated_returns_data = [get_simulated_returns(lr, n_days, n_simulations) for lr in log_returns_data]
+
+assert(
+    len(quotes_data) > 0
+    and len(quotes_data) == len(symbols_list) == len(returns_data) == len(log_returns_data) == len(simulated_returns_data)
+)
 
 # 3. evaluate
-for i in range(len(simulated_returns_data)):
-    print(f'---- Starting Monte-Carlo simulation for {symbols[i]} symbol... ----')
-    prices_ms = evaluate_simulation(simulated_returns_data[i], quotes_data[i]['close'].tail(1), n_days, plot=True)
+for i in range(len(symbols_list)):
+    print(f'---- Starting Monte-Carlo simulation for {symbols_list[i]} symbol ----')
+
     
-    plt.figure(figsize=(10,6))
-    plt.plot(prices_ms.iloc[:, 1:50])
+    prices_ms = evaluate_simulation(simulated_returns_data[i], quotes_data[i]['close'].tail(1), n_days, plot=False)
+    
+    plt.figure(figsize=(10, 6))
+    plt.plot(prices_ms.sample(100, axis='columns'))
+    plt.title(f'{symbols_list[i]} Price Simulation')
+    plt.xlabel('Days')
+    plt.ylabel('Amount per $1')
     plt.show()
 
-# %%
+
+# %% Completed ----
+gc()