diff --git a/src/cryptocurrency_portfolio__assets_selection.py b/src/cryptocurrency_portfolio__assets_selection.py new file mode 100644 index 0000000..fdf26f9 --- /dev/null +++ b/src/cryptocurrency_portfolio__assets_selection.py @@ -0,0 +1,188 @@ +#!/usr/bin/python3 + + +"""Crypto Currency Portfolio: Assets Selection. + +Description: + Crypto Currency Selection using monte Carlo simulation. +""" + +# %% Import dependencies ---- +# core +import os +import gc + +# data science +import pandas as pd +import numpy as np +from scipy.stats import norm + +# Cloud integration +from azureml.core import Workspace, Dataset, VERSION as aml_version +print(f'Azure ML SDK v{aml_version}') + +# network +import ssl +ssl._create_default_https_context = ssl._create_unverified_context + +# plots +import matplotlib.pyplot as plt +import seaborn as sns + +# show info about python env +from IPython import sys_info +print(sys_info()) + +import warnings +warnings.filterwarnings("ignore") + + +# %% Set params ---- +symbols = ['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'SOLUSDT', 'MATICUSDT', 'UNIUSDT'] + +n_days = int(252) # US market has 252 trading days in a year +n_iterations = int(1e4) + + + +# %% Load quotes ---- +def get_quotes(symbol: str) -> pd.DataFrame: + df = pd.read_csv(f'https://www.cryptodatadownload.com/cdd/Binance_{symbol}_d.csv', skiprows=[0]) + df = df.set_index('date') + df = df.sort_values(by = 'date') + + return df[['symbol', 'open', 'high', 'low', 'close']] + +quotes_data = [get_quotes(s) for s in symbols] +# row-wise union: +# pd.concat([get_quotes(s) for s in symbols]) +# column-wise: +# pd.concat(list1, axis=1, ignore_index=False) + + +btcusdt_df = quotes_data[0] + +pd.concat([ + btcusdt_df['close'].head(5), + btcusdt_df['close'].tail(5) +]) + + + +# %% Calculate Return +def get_returns(close_prices) -> pd.Series: + return (close_prices/close_prices.shift()) - 1 + + +btcusdt_df['diff'] = btcusdt_df['close'].diff() +btcusdt_df['return'] = get_returns(btcusdt_df['close']) + +btcusdt_df[['close', 'diff', 'return']].tail(10) + + + +# %% Calculate LogReturn +def get_log_returns(return_prices) -> pd.Series: + return np.log(1 + return_prices) + +btcusdt_df['log_return'] = btcusdt_df['return'].apply(lambda x: get_log_returns(x)) +btcusdt_df[['close', 'diff', 'return', 'log_return']].tail(10) + + + +# %% Simulate possible LogReturns + +def get_simulated_returns(log_returns: pd.Series, n_days: int, n_iterations: int) -> pd.Series: + u = log_returns.mean() + var = log_returns.var() + stdev = log_returns.std() + + drift = u - (0.5*var) + Z = norm.ppf(np.random.rand(n_days, n_iterations)) + + return np.exp(drift + stdev*Z) + + +btcusd_logreturns = btcusdt_df['log_return'].dropna() +btcusd_simulated_returns = get_simulated_returns(btcusd_logreturns, n_days, n_iterations) + +assert( + btcusd_simulated_returns.shape == (n_days, n_iterations) +) + + + +# %% Monte carlo simulation functions ---- +def get_breakeven_prob(predicted, threshold = 0): + """ + This function calculated the probability of a stock being above a certain threshhold, which can be defined as a value (final stock price) or return rate (percentage change) + """ + predicted0 = predicted.iloc[0,0] + predicted = predicted.iloc[-1] + predList = list(predicted) + + over = [(i*100)/predicted0 for i in predList if ((i-predicted0)*100)/predicted0 >= threshold] + less = [(i*100)/predicted0 for i in predList if ((i-predicted0)*100)/predicted0 < threshold] + + return (len(over)/(len(over) + len(less))) + + +def monte_carlo_simulation(simulated_returns: pd.Series, last_actual_price: float, n_days: int, plot=True): + # Create empty matrix + price_list = np.zeros_like(simulated_returns) + + # Put the last actual price in the first row of matrix + price_list[0] = last_actual_price + + # Calculate the price of each day + for t in range(1, n_days): + price_list[t] = price_list[t-1]*simulated_returns[t] + + # Plot + if plot == True: + x = pd.DataFrame(price_list).iloc[-1] + fig, ax = plt.subplots(1, 2, figsize=(14,4)) + sns.distplot(x, ax=ax[0]) + sns.distplot(x, hist_kws={'cumulative': True}, kde_kws={'cumulative': True}, ax=ax[1]) + plt.xlabel('Stock Price') + plt.show() + + + print(f"Investment period: {n_days-1}") + print(f"Expected Value: ${round(pd.DataFrame(price_list).iloc[-1].mean(),2)}") + print(f"Return: {round(100*(pd.DataFrame(price_list).iloc[-1].mean()-price_list[0,1])/pd.DataFrame(price_list).iloc[-1].mean(),2)}%") + print(f"Probability of Breakeven: {get_breakeven_prob(pd.DataFrame(price_list))}") + + return pd.DataFrame(price_list) + + +# %% Run Monte carlo simulation and estimate result + +simulated_prices_df = monte_carlo_simulation( + btcusd_simulated_returns, + quotes_data[0]['close'].tail(1), + n_days) + + +plt.figure(figsize=(10,6)) +plt.plot(simulated_prices_df.iloc[:, 1:10]) +plt.show() + + + +# %% Monte Carlo simulation pipeline for multiple tokens ---- + +n_iterations = int(1e4) #! WARN: set simulations number + +returns_data = [get_returns(df['close']) for df in quotes_data] +log_returns_data = [get_log_returns(r) for r in returns_data] +simulated_returns_data = [get_simulated_returns(lr, n_days, n_iterations) for lr in log_returns_data] + + +for i in range(len(simulated_returns_data)): + print(f'Starting Monte-Carlo simulation for {symbols[i]} symbol...') + prices_ms = monte_carlo_simulation(simulated_returns_data[i], quotes_data[i]['close'].tail(1), n_days, plot=True) + + plt.figure(figsize=(10,6)) + plt.plot(prices_ms.iloc[:, 1:50]) + plt.show() diff --git a/src/currency_portf.py b/src/fx_currency_portfolio__assets_selection.py similarity index 100% rename from src/currency_portf.py rename to src/fx_currency_portfolio__assets_selection.py