From e9141ec9ce136d2df71708aec906571abc098afe Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Thu, 21 Apr 2022 16:55:17 +0000 Subject: [PATCH 01/17] Init commit --- .gitignore | 1 + src/azure.py | 64 +++++++++++++++++++++++ src/bitfinex_crypto_parser.py | 98 +++++++++++++++++++++++++++++++++++ src/openfigi_parser.py | 75 +++++++++++++++++++++++++++ 4 files changed, 238 insertions(+) create mode 100644 .gitignore create mode 100644 src/azure.py create mode 100644 src/bitfinex_crypto_parser.py create mode 100644 src/openfigi_parser.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..932f02d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/data/*.csv \ No newline at end of file diff --git a/src/azure.py b/src/azure.py new file mode 100644 index 0000000..2e55dc3 --- /dev/null +++ b/src/azure.py @@ -0,0 +1,64 @@ + +# %% Import dependencies ---- +from dataclasses import dataclass +from typing import Dict, Any, Iterable +from pandas import DataFrame +from sqlalchemy import create_engine, inspect +import urllib + + +# %% +@dataclass(frozen=True) +class ConnectionSettings: + """Connection Settings.""" + server: str + database: str + username: str + password: str + driver: str = '{ODBC Driver 17 for SQL Server}' + timeout: int = 30 + + +class AzureDbConnection: + """ + Azure SQL database connection. + """ + def __init__(self, conn_settings: ConnectionSettings, echo: bool = False) -> None: + conn_params = urllib.parse.quote_plus( + 'Driver=%s;' % conn_settings.driver + + 'Server=tcp:%s,1433;' % conn_settings.server + + 'Database=%s;' % conn_settings.database + + 'Uid=%s;' % conn_settings.username + + 'Pwd={%s};' % conn_settings.password + + 'Encrypt=yes;' + + 'TrustServerCertificate=no;' + + 'Connection Timeout=%s;' % conn_settings.timeout + ) + conn_string = f'mssql+pyodbc:///?odbc_connect={conn_params}' + + self.db = create_engine(conn_string, echo=echo) + + def connect(self) -> None: + """Estimate connection.""" + self.conn = self.db.connect() + + def get_tables(self) -> Iterable[str]: + """Get list of tables.""" + inspector = inspect(self.db) + return [t for t in inspector.get_table_names()] + + def insert(self, inserted_data: DataFrame, target_table: str, db_mapping: Dict[str, Any], chunksize: int = 10000) -> None: + inserted_data.to_sql( + con=self.db, + schema='dbo', + name=target_table, + if_exists='append', # or replace + index=False, + chunksize=chunksize, + dtype=db_mapping + ) + + def dispose(self) -> None: + """Dispose opened connections.""" + self.conn.close() + self.db.dispose() diff --git a/src/bitfinex_crypto_parser.py b/src/bitfinex_crypto_parser.py new file mode 100644 index 0000000..ffcadb0 --- /dev/null +++ b/src/bitfinex_crypto_parser.py @@ -0,0 +1,98 @@ +#!/usr/bin/python3 + +""" + +Data source: https://www.kaggle.com/code/tencars/bitfinexdataset +""" + +# %% +import os +import numpy as np +import pandas as pd +from sqlalchemy import types + +from azure import AzureDbConnection, ConnectionSettings + + +# %% +input_path = "../data" + +# Get names and number of available currency pairs +pair_names = [x[:-4] for x in os.listdir(input_path)] +n_pairs = len(pair_names) + +# Print the first 50 currency pair names +print("These are the first 50 out of {} currency pairs in the dataset:".format(n_pairs)) +print(pair_names[0:50]) + +usd_pairs = [s for s in pair_names if "usd" in s] +print(usd_pairs) + +# %% + +def load_data(symbol, source=input_path): + path_name = source + "/" + symbol + ".csv" + + # Load data + df = pd.read_csv(path_name, index_col='time', dtype={'open': np.float64, 'high': np.float64, 'low': np.float64, 'close': np.float64, 'volume': np.float64}) + df.index = pd.to_datetime(df.index, unit='ms') + df = df[~df.index.duplicated(keep='first')] + + # As mentioned in the description, bins without any change are not recorded. + # We have to fill these gaps by filling them with the last value until a change occurs. + #df = df.resample('1T').pad() + + return df[['open', 'high', 'low', 'close', 'volume']] + + +# %% ---- +solusd = load_data("solusd") +solusd.tail() + + +# %% ---- +conn_settings = ... +db_conn = AzureDbConnection(conn_settings) + +db_conn.connect() +for t in db_conn.get_tables(): + print(t) + + +# %% +min_candels_n = 10000 + +db_mapping = { + 'FIGI': types.VARCHAR(length=12), + 'open': types.DECIMAL(precision=19, scale=9), + 'high': types.DECIMAL(precision=19, scale=9), + 'close': types.DECIMAL(precision=19, scale=9), + 'low': types.DECIMAL(precision=19, scale=9), + 'volume': types.DECIMAL(precision=19, scale=9), + 'time': types.DATETIME(), + 'source_id': types.SMALLINT, + 'version': types.VARCHAR(length=12), + 'interval': types.CHAR(length=2) +} + +for pair in usd_pairs: + print(f'Starting read {pair}...') + candles_df = load_data(pair) + + candles_df['FIGI'] = pair + candles_df['time'] = candles_df.index + candles_df['source_id'] = 128 + candles_df['version'] = 'v202204' + candles_df['interval'] = '1M' + + if candles_df.shape[0] > min_candels_n: + print('{} rows from {} to {}'.format(candles_df.shape[0], min(candles_df['time']), max(candles_df['time']))) + + print(f'Starting insert {pair}...') + db_conn.insert(candles_df, 'Cryptocurrency', db_mapping) + else: + print(f'WARN: {pair} has only {candles_df.shape[0]} records') + + +# %% +db_conn.dispose() diff --git a/src/openfigi_parser.py b/src/openfigi_parser.py new file mode 100644 index 0000000..e689185 --- /dev/null +++ b/src/openfigi_parser.py @@ -0,0 +1,75 @@ +# %% +from dataclasses import dataclass +from typing import Optional +import pandas as pd +import httpx + + +# %% +@dataclass +class AssetInfo: + FIGI: str + Ticker: str + Title: str + Description: Optional[str] + AssetType: str = 'Cryptocurrency' + SourceId: str = "OpenFigi API" + Version: str = "v202204" + + +def get_asset_info(pair: str) -> AssetInfo: + api_url = f'https://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22Curncy%22&num_rows=100&simpleSearchString={pair}&start=0' + response = httpx.get(api_url) + + json_response = response.json() + response_df = pd.DataFrame.from_dict(json_response['result'], orient='columns') + if len(response_df) == 0: + print(f'[WARN] {pair} not found') + return None + + pair_figi = response_df.kkg_pairFIGI_sd.unique() + + if (len(pair_figi) != 1): + print(f'[WARN] {len(pair_figi)} records was found for {pair}') + else: + print(f'[INFO] {pair} associated w/ FIGI {pair_figi[0]}') + + return pair_figi + + +#%% Tests +expected_pairs = { + 'WAX-USD': None, + 'ETH-USD': 'BBG00J3NBWD7', + 'BTC-USD': 'BBG006FCL7J4', + 'SOL-USD': 'BBG013WVY457', + 'UNI-USD': 'BBG013TZFVW3' +} + +for k, v in expected_pairs.items(): + assert get_asset_info(k) == v + + +# %% +import os +import pandas as pd + +pair_names = [x[:-4] for x in os.listdir("../data")] + +def insert_dash(text: str, position: int) -> str: + if '-' not in text: + return text[:position] + '-' + text[position:] + else: + return text + +usd_pairs = [insert_dash(s.upper(), 3) for s in pair_names if "usd" in s] + +print(usd_pairs) + +# %% +pair_figi_list = [get_asset_info(p) for p in usd_pairs] + +for p in usd_pairs: + print(p) + get_asset_info(p) +# %% From 03dc615a8c8d591a94389c4ab2740d39fbaf65a9 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Tue, 7 Jun 2022 14:31:52 +0000 Subject: [PATCH 02/17] Format connection string --- src/bitfinex_crypto_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bitfinex_crypto_parser.py b/src/bitfinex_crypto_parser.py index ffcadb0..c0c03e1 100644 --- a/src/bitfinex_crypto_parser.py +++ b/src/bitfinex_crypto_parser.py @@ -51,7 +51,7 @@ solusd.tail() # %% ---- -conn_settings = ... +conn_settings = ConnectionSettings(server='***.database.windows.net', database='market-data-db', username='demo', password='***') db_conn = AzureDbConnection(conn_settings) db_conn.connect() From e1f86b7714a1cf8ba12784a55d42052f2e78d876 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Fri, 17 Jun 2022 16:51:48 +0000 Subject: [PATCH 03/17] Add .zip to except list --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 932f02d..1e53675 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -/data/*.csv \ No newline at end of file +/data/*.csv +/data/*.zip From 0d5e9611239bc8240a622d67eeec5a26d1a5da9f Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Fri, 17 Jun 2022 17:47:32 +0000 Subject: [PATCH 04/17] Update FIGI parser --- src/openfigi_parser.py | 80 ++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 19 deletions(-) diff --git a/src/openfigi_parser.py b/src/openfigi_parser.py index e689185..838dc85 100644 --- a/src/openfigi_parser.py +++ b/src/openfigi_parser.py @@ -1,23 +1,35 @@ # %% +import os from dataclasses import dataclass -from typing import Optional +from typing import Literal, Union + import pandas as pd + import httpx +from sqlalchemy import types, sql +from azure import AzureDbConnection, ConnectionSettings + # %% @dataclass class AssetInfo: FIGI: str Ticker: str - Title: str - Description: Optional[str] - AssetType: str = 'Cryptocurrency' - SourceId: str = "OpenFigi API" - Version: str = "v202204" + Title: Union[str, None] + Description: Union[str, None] + AssetType: Literal['Cryptocurrency'] + SourceId: Literal['OpenFigi API'] + Version: Literal['v202206'] -def get_asset_info(pair: str) -> AssetInfo: +def get_figi(pair: str) -> Union[AssetInfo, None]: + """Return FIGI for pair + + References: + - https://www.openfigi.com/assets/local/figi-allocation-rules.pdf + - https://www.openfigi.com/search + """ api_url = f'https://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22Curncy%22&num_rows=100&simpleSearchString={pair}&start=0' response = httpx.get(api_url) @@ -37,23 +49,23 @@ def get_asset_info(pair: str) -> AssetInfo: return pair_figi -#%% Tests +# %% Tests expected_pairs = { 'WAX-USD': None, 'ETH-USD': 'BBG00J3NBWD7', 'BTC-USD': 'BBG006FCL7J4', 'SOL-USD': 'BBG013WVY457', - 'UNI-USD': 'BBG013TZFVW3' + 'UNI-USD': 'BBG013TZFVW3', + 'SUSHI-USD': 'KKG0000010W1' } for k, v in expected_pairs.items(): - assert get_asset_info(k) == v + actual = get_figi(k) + print(actual) + assert actual == v # %% -import os -import pandas as pd - pair_names = [x[:-4] for x in os.listdir("../data")] def insert_dash(text: str, position: int) -> str: @@ -62,14 +74,44 @@ def insert_dash(text: str, position: int) -> str: else: return text -usd_pairs = [insert_dash(s.upper(), 3) for s in pair_names if "usd" in s] +usd_pairs = [ + insert_dash(s.upper(), 3) + for s in pair_names if "usd" in s +] print(usd_pairs) -# %% -pair_figi_list = [get_asset_info(p) for p in usd_pairs] -for p in usd_pairs: - print(p) - get_asset_info(p) +pair_figi_list = [get_figi(p) for p in usd_pairs] + +# %% ---- +conn_settings = ConnectionSettings(server='***.database.windows.net', database='market-data-db', username='***', password='***') +db_conn = AzureDbConnection(conn_settings) + +db_conn.connect() +for t in db_conn.get_tables(): + print(t) + + # %% +db_mapping = { + 'FIGI': types.VARCHAR(length=12), + 'open': types.DECIMAL(precision=19, scale=9), + 'high': types.DECIMAL(precision=19, scale=9), + 'close': types.DECIMAL(precision=19, scale=9), + 'low': types.DECIMAL(precision=19, scale=9), + 'volume': types.DECIMAL(precision=19, scale=9), + 'time': types.DATETIME(), + 'source_id': types.SMALLINT, + 'version': types.VARCHAR(length=12), + 'interval': types.CHAR(length=2) +} + + +query = sql.text("select * from Cryptocurrency where figi = 'ustusd'") +result = db_conn._conn.execute(query).fetchall() + + +# %% +db_conn.dispose() +print('Completed') From a3a48fbcbad3b0bd980c97c37193ea58b0f1e936 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Sun, 19 Jun 2022 22:58:56 +0000 Subject: [PATCH 05/17] OpenFigi crawler draft --- src/openfigi_crawler.py | 167 ++++++++++++++++++++++++++++++++++++++++ src/openfigi_parser.py | 117 ---------------------------- 2 files changed, 167 insertions(+), 117 deletions(-) create mode 100644 src/openfigi_crawler.py delete mode 100644 src/openfigi_parser.py diff --git a/src/openfigi_crawler.py b/src/openfigi_crawler.py new file mode 100644 index 0000000..42fbc4b --- /dev/null +++ b/src/openfigi_crawler.py @@ -0,0 +1,167 @@ + +# %% Import dependencies +import os +from dataclasses import dataclass +from typing import Dict, Union + +import pandas as pd + +import httpx + +from sqlalchemy import types +from azure import AzureDbConnection, ConnectionSettings + + +# %% Data models +@dataclass +class AssetInfo: + FIGI: str + Ticker: str + Title: Union[str, None] + Description: Union[str, None] + AssetType: str = 'Cryptocurrency' + SourceId: str = 'OpenFigi API' + Version: str = 'v202206' + + def as_dict(self) -> Dict[str, str]: + return {'Figi': self.FIGI, 'Ticker': self.Ticker} + + +# %% FIGI provider +class OpenFigiProvider: + """ + OpenFigi API provider + + References: + https://www.openfigi.com/assets/local/figi-allocation-rules.pdf + https://www.openfigi.com/search + """ + @staticmethod + def _send_request(ticker: str, asset_type: str) -> pd.DataFrame: + api_url = f'https://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22{asset_type}%22&num_rows=100&simpleSearchString={ticker}&start=0' + response = httpx.get(api_url) + + json_response = response.json() + return pd.DataFrame.from_dict(json_response['result'], orient='columns') + + + @staticmethod + def _find_figi(df: pd.DataFrame, field_name: str) -> Union[str, None]: + if len(df) == 0 or field_name not in df.columns: + return None + + result = df[field_name].dropna().unique() + + if (len(result) != 1): + print(f'[WARN] Multiple ({len(result)}) FIGI records was found') + return None + + return result[0] + + + @staticmethod + def _find_name(df: pd.DataFrame) -> Union[str, None]: + if len(df) == 0 or 'DS002_sd' not in df.columns: + return None + + result = df['DS002_sd'].dropna().unique() + + if (len(result) != 1): + print(f'[WARN] Multiple ({len(result)}) name records was found') + return None + + return result[0] + + + def search(self, ticker: str, asset_type: str = 'Curncy') -> Union[AssetInfo, None]: + """Return FIGI for pair""" + + response_df = OpenFigiProvider._send_request(ticker, asset_type) + + figi = OpenFigiProvider._find_figi(response_df, 'kkg_pairFIGI_sd') + + if figi is None: + base_quote = ticker.split('-')[0] + print(f'[INFO] {ticker} > Try to search using base quote {base_quote}') + + response_df = OpenFigiProvider._send_request(base_quote, asset_type) + figi = OpenFigiProvider._find_figi(response_df, 'kkg_baseAssetFigi_sd') + + if figi is None: + return None + + return AssetInfo(figi, ticker, None, None) + + +#%% +figi_provider = OpenFigiProvider() + +assert figi_provider.search('WAX-USD') == None +assert figi_provider.search('ABCD') == None + + +# %% Tests +expected_pairs = { + 'BNB-USD': 'KKG000007HZ5', + 'ETH-USD': 'BBG00J3NBWD7', + 'BTC-USD': 'BBG006FCL7J4', + 'SOL-USD': 'BBG013WVY457', + 'UNI-USD': 'BBG013TZFVW3', + 'SUSHI-USD': 'KKG0000010W1', + 'AVAX-USD': 'KKG000007J36' +} + + +for k, v in expected_pairs.items(): + actual = figi_provider.search(k) + print(actual.as_dict()) + assert ( + isinstance(actual, AssetInfo) + and actual.FIGI == v + and actual.Ticker == k + ) + + +# %% Get assets for searching figi +pair_names = [x[:-4] for x in os.listdir("../data")] + +def insert_dash(text: str, position: int) -> str: + if '-' not in text: + return text[:position] + '-' + text[position:] + else: + return text + +usd_pairs = [ + insert_dash(s.upper(), 3) + for s in pair_names if "usd" in s +] + +print(usd_pairs[1:10]) + + +# %% +figi_provider = OpenFigiProvider() +pair_figi_list = [figi_provider.search(p) for p in usd_pairs] + + +# %% ---- +db_conn = AzureDbConnection(conn_settings) + +db_conn.connect() +for t in db_conn.get_tables(): + print(t) + + +# %% +db_mapping = { + 'Figi': types.CHAR(length=12), + 'Ticker': types.VARCHAR(length=12) +} + +figi_df = pd.DataFrame([t.as_dict() for t in pair_figi_list if isinstance(t, AssetInfo)]) +db_conn.insert(figi_df, 'figi', db_mapping) + + +# %% +db_conn.dispose() +print('Completed') diff --git a/src/openfigi_parser.py b/src/openfigi_parser.py deleted file mode 100644 index 838dc85..0000000 --- a/src/openfigi_parser.py +++ /dev/null @@ -1,117 +0,0 @@ -# %% -import os -from dataclasses import dataclass -from typing import Literal, Union - -import pandas as pd - -import httpx - -from sqlalchemy import types, sql -from azure import AzureDbConnection, ConnectionSettings - - -# %% -@dataclass -class AssetInfo: - FIGI: str - Ticker: str - Title: Union[str, None] - Description: Union[str, None] - AssetType: Literal['Cryptocurrency'] - SourceId: Literal['OpenFigi API'] - Version: Literal['v202206'] - - -def get_figi(pair: str) -> Union[AssetInfo, None]: - """Return FIGI for pair - - References: - - https://www.openfigi.com/assets/local/figi-allocation-rules.pdf - - https://www.openfigi.com/search - """ - api_url = f'https://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22Curncy%22&num_rows=100&simpleSearchString={pair}&start=0' - response = httpx.get(api_url) - - json_response = response.json() - response_df = pd.DataFrame.from_dict(json_response['result'], orient='columns') - if len(response_df) == 0: - print(f'[WARN] {pair} not found') - return None - - pair_figi = response_df.kkg_pairFIGI_sd.unique() - - if (len(pair_figi) != 1): - print(f'[WARN] {len(pair_figi)} records was found for {pair}') - else: - print(f'[INFO] {pair} associated w/ FIGI {pair_figi[0]}') - - return pair_figi - - -# %% Tests -expected_pairs = { - 'WAX-USD': None, - 'ETH-USD': 'BBG00J3NBWD7', - 'BTC-USD': 'BBG006FCL7J4', - 'SOL-USD': 'BBG013WVY457', - 'UNI-USD': 'BBG013TZFVW3', - 'SUSHI-USD': 'KKG0000010W1' -} - -for k, v in expected_pairs.items(): - actual = get_figi(k) - print(actual) - assert actual == v - - -# %% -pair_names = [x[:-4] for x in os.listdir("../data")] - -def insert_dash(text: str, position: int) -> str: - if '-' not in text: - return text[:position] + '-' + text[position:] - else: - return text - -usd_pairs = [ - insert_dash(s.upper(), 3) - for s in pair_names if "usd" in s -] - -print(usd_pairs) - - -pair_figi_list = [get_figi(p) for p in usd_pairs] - -# %% ---- -conn_settings = ConnectionSettings(server='***.database.windows.net', database='market-data-db', username='***', password='***') -db_conn = AzureDbConnection(conn_settings) - -db_conn.connect() -for t in db_conn.get_tables(): - print(t) - - -# %% -db_mapping = { - 'FIGI': types.VARCHAR(length=12), - 'open': types.DECIMAL(precision=19, scale=9), - 'high': types.DECIMAL(precision=19, scale=9), - 'close': types.DECIMAL(precision=19, scale=9), - 'low': types.DECIMAL(precision=19, scale=9), - 'volume': types.DECIMAL(precision=19, scale=9), - 'time': types.DATETIME(), - 'source_id': types.SMALLINT, - 'version': types.VARCHAR(length=12), - 'interval': types.CHAR(length=2) -} - - -query = sql.text("select * from Cryptocurrency where figi = 'ustusd'") -result = db_conn._conn.execute(query).fetchall() - - -# %% -db_conn.dispose() -print('Completed') From 153fc5a230b270c1fd33dd74eb368332dbf04f4c Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Sun, 19 Jun 2022 22:59:58 +0000 Subject: [PATCH 06/17] Parser update --- src/bitfinex_crypto_parser.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/bitfinex_crypto_parser.py b/src/bitfinex_crypto_parser.py index c0c03e1..313cc3b 100644 --- a/src/bitfinex_crypto_parser.py +++ b/src/bitfinex_crypto_parser.py @@ -1,7 +1,6 @@ #!/usr/bin/python3 """ - Data source: https://www.kaggle.com/code/tencars/bitfinexdataset """ @@ -15,6 +14,11 @@ from azure import AzureDbConnection, ConnectionSettings # %% + +#> ~/apps/resistance/data +#> kaggle datasets download tencars/392-crypto-currency-pairs-at-minute-resolution +#> unzip 392-crypto-currency-pairs-at-minute-resolution.zip + input_path = "../data" # Get names and number of available currency pairs @@ -28,6 +32,7 @@ print(pair_names[0:50]) usd_pairs = [s for s in pair_names if "usd" in s] print(usd_pairs) + # %% def load_data(symbol, source=input_path): @@ -46,12 +51,12 @@ def load_data(symbol, source=input_path): # %% ---- -solusd = load_data("solusd") -solusd.tail() +sample_df = load_data("ethusd") +sample_df + # %% ---- -conn_settings = ConnectionSettings(server='***.database.windows.net', database='market-data-db', username='demo', password='***') db_conn = AzureDbConnection(conn_settings) db_conn.connect() @@ -63,7 +68,7 @@ for t in db_conn.get_tables(): min_candels_n = 10000 db_mapping = { - 'FIGI': types.VARCHAR(length=12), + 'FIGI': types.CHAR(length=12), 'open': types.DECIMAL(precision=19, scale=9), 'high': types.DECIMAL(precision=19, scale=9), 'close': types.DECIMAL(precision=19, scale=9), @@ -82,14 +87,14 @@ for pair in usd_pairs: candles_df['FIGI'] = pair candles_df['time'] = candles_df.index candles_df['source_id'] = 128 - candles_df['version'] = 'v202204' + candles_df['version'] = 'v202206' candles_df['interval'] = '1M' if candles_df.shape[0] > min_candels_n: print('{} rows from {} to {}'.format(candles_df.shape[0], min(candles_df['time']), max(candles_df['time']))) print(f'Starting insert {pair}...') - db_conn.insert(candles_df, 'Cryptocurrency', db_mapping) + db_conn.insert(candles_df, 'crypto', db_mapping) else: print(f'WARN: {pair} has only {candles_df.shape[0]} records') From b51d4a01628ed2609b8941792412dd60dafaf41e Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Sun, 19 Jun 2022 23:01:08 +0000 Subject: [PATCH 07/17] Minor updates --- src/azure.py | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/azure.py b/src/azure.py index 2e55dc3..05e6f96 100644 --- a/src/azure.py +++ b/src/azure.py @@ -1,16 +1,18 @@ # %% Import dependencies ---- from dataclasses import dataclass -from typing import Dict, Any, Iterable -from pandas import DataFrame +from typing import Dict, Any + from sqlalchemy import create_engine, inspect + +import pandas as pd import urllib -# %% +# %% Models @dataclass(frozen=True) class ConnectionSettings: - """Connection Settings.""" + """Connection Settings""" server: str database: str username: str @@ -19,10 +21,10 @@ class ConnectionSettings: timeout: int = 30 +# %% Connection class AzureDbConnection: - """ - Azure SQL database connection. - """ + """Azure SQL database connection.""" + def __init__(self, conn_settings: ConnectionSettings, echo: bool = False) -> None: conn_params = urllib.parse.quote_plus( 'Driver=%s;' % conn_settings.driver + @@ -36,29 +38,29 @@ class AzureDbConnection: ) conn_string = f'mssql+pyodbc:///?odbc_connect={conn_params}' - self.db = create_engine(conn_string, echo=echo) + self._db = create_engine(conn_string, echo=echo) def connect(self) -> None: - """Estimate connection.""" - self.conn = self.db.connect() + """Estimate connection""" + self._conn = self._db.connect() - def get_tables(self) -> Iterable[str]: - """Get list of tables.""" - inspector = inspect(self.db) + def get_tables(self) -> list[str]: + """Get list of tables""" + inspector = inspect(self._db) return [t for t in inspector.get_table_names()] - def insert(self, inserted_data: DataFrame, target_table: str, db_mapping: Dict[str, Any], chunksize: int = 10000) -> None: + def insert(self, inserted_data: pd.DataFrame, target_table: str, db_mapping: Dict[str, Any], chunksize: int = 10000) -> None: inserted_data.to_sql( - con=self.db, + con=self._db, schema='dbo', name=target_table, - if_exists='append', # or replace + if_exists='replace', # or append index=False, chunksize=chunksize, dtype=db_mapping ) def dispose(self) -> None: - """Dispose opened connections.""" - self.conn.close() - self.db.dispose() + """Dispose opened connections""" + self._conn.close() + self._db.dispose() From f3b515725cbf8a8d382c6d14604286865f4cc9a3 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Thu, 23 Jun 2022 12:50:00 +0000 Subject: [PATCH 08/17] Binance lab --- src/candidate_tests.ipynb | 1276 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1276 insertions(+) create mode 100644 src/candidate_tests.ipynb diff --git a/src/candidate_tests.ipynb b/src/candidate_tests.ipynb new file mode 100644 index 0000000..643e5f9 --- /dev/null +++ b/src/candidate_tests.ipynb @@ -0,0 +1,1276 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# A little non-economic research\n", + "\n", + "\n", + "## Existing quiz tests\n", + "\n", + "https://www.linkedin.com/skill-assessments/hub/quizzes/ \n", + "\n", + "![](../docs/li.png)\n", + "\n", + "https://www.w3schools.com/quiztest/quiztest.asp?qtest=PANDAS\n", + "\n", + "\n", + "![](../docs/w3.png)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code interview\n", + "\n", + "Datasets:\n", + "\n", + "1. Binance Open Data: [spot candles](https://github.com/binance/binance-public-data/#klines)\n", + "2. OpenFIGI: [search API](https://www.openfigi.com/search).\n", + "\n", + "### Binance Open Data\n", + "\n", + "Downloading candles for `BTC/USDT` and `BTC/UDSC` using `bash` or `powershell`:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mkdir: cannot create directory ‘../data/binance’: File exists\n", + "--2022-06-23 12:48:13-- https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", + "Resolving data.binance.vision (data.binance.vision)... 13.224.2.90, 13.224.2.55, 13.224.2.128, ...\n", + "Connecting to data.binance.vision (data.binance.vision)|13.224.2.90|:443... connected.\n", + "HTTP request sent, awaiting response... 304 Not Modified\n", + "File ‘../data/binance/BTCUSDT-1m-2022-06-21.zip’ not modified on server. Omitting download.\n", + "\n", + "--2022-06-23 12:48:14-- https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", + "Resolving data.binance.vision (data.binance.vision)... 13.224.2.90, 13.224.2.55, 13.224.2.128, ...\n", + "Connecting to data.binance.vision (data.binance.vision)|13.224.2.90|:443... connected.\n", + "HTTP request sent, awaiting response... 304 Not Modified\n", + "File ‘../data/binance/BTCUSDC-1m-2022-06-21.zip’ not modified on server. Omitting download.\n", + "\n", + "Archive: ../data/binance/BTCUSDT-1m-2022-06-21.zip\n", + " inflating: ../data/binance/BTCUSDT-1m-2022-06-21.csv \n", + "Archive: ../data/binance/BTCUSDC-1m-2022-06-21.zip\n", + " inflating: ../data/binance/BTCUSDC-1m-2022-06-21.csv \n" + ] + } + ], + "source": [ + "#!/bin/sh\n", + "\n", + "# create dir for data\n", + "!mkdir ../data/binance\n", + "\n", + "# download data using GET request\n", + "!wget -N -P ../data/binance https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", + "!wget -N -P../data/binance https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", + "\n", + "# unzip\n", + "!unzip -o -d ../data/binance ../data/binance/BTCUSDT-1m-2022-06-21.zip \n", + "!unzip -o -d ../data/binance ../data/binance/BTCUSDC-1m-2022-06-21.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import packages for data analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import httpx\n", + "\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read data from CSV file to Pandas DataFrame:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234567891011
0165576960000020573.9020590.0020552.1720558.3670.7692516557696599991.455321e+06115037.368217.683845e+050
1165576966000020558.3520611.2120558.3520606.70118.0603216557697199992.430514e+06140261.257601.260950e+060
2165576972000020606.6920626.8920552.4020552.40130.4289416557697799992.686026e+06143355.805731.149409e+060
3165576978000020552.4120585.6920539.0920578.89103.5631816557698399992.128819e+06130164.573461.327338e+060
4165576984000020578.8920579.9020537.5720554.4683.5550916557698999991.717907e+06109836.409447.485065e+050
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 \\\n", + "0 1655769600000 20573.90 20590.00 20552.17 20558.36 70.76925 \n", + "1 1655769660000 20558.35 20611.21 20558.35 20606.70 118.06032 \n", + "2 1655769720000 20606.69 20626.89 20552.40 20552.40 130.42894 \n", + "3 1655769780000 20552.41 20585.69 20539.09 20578.89 103.56318 \n", + "4 1655769840000 20578.89 20579.90 20537.57 20554.46 83.55509 \n", + "\n", + " 6 7 8 9 10 11 \n", + "0 1655769659999 1.455321e+06 1150 37.36821 7.683845e+05 0 \n", + "1 1655769719999 2.430514e+06 1402 61.25760 1.260950e+06 0 \n", + "2 1655769779999 2.686026e+06 1433 55.80573 1.149409e+06 0 \n", + "3 1655769839999 2.128819e+06 1301 64.57346 1.327338e+06 0 \n", + "4 1655769899999 1.717907e+06 1098 36.40944 7.485065e+05 0 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_data(pair: str) -> pd.DataFrame:\n", + " return pd.read_csv(f'../data/binance/{pair}-1m-2022-06-21.csv', header = None)\n", + "\n", + "btcusdt_df = get_data('BTCUSDT')\n", + "btcusdt_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set names to columns with 1m candles:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Open_timeOpenHighLowCloseVolumeClose_timeQuote_asset_volumeNumber_of_tradesTaker_buy_base_asset_volumeTaker_buy_quote_asset_volumeIgnore
0165576960000020573.9020590.0020552.1720558.3670.7692516557696599991.455321e+06115037.368217.683845e+050
1165576966000020558.3520611.2120558.3520606.70118.0603216557697199992.430514e+06140261.257601.260950e+060
2165576972000020606.6920626.8920552.4020552.40130.4289416557697799992.686026e+06143355.805731.149409e+060
3165576978000020552.4120585.6920539.0920578.89103.5631816557698399992.128819e+06130164.573461.327338e+060
4165576984000020578.8920579.9020537.5720554.4683.5550916557698999991.717907e+06109836.409447.485065e+050
\n", + "
" + ], + "text/plain": [ + " Open_time Open High Low Close Volume \\\n", + "0 1655769600000 20573.90 20590.00 20552.17 20558.36 70.76925 \n", + "1 1655769660000 20558.35 20611.21 20558.35 20606.70 118.06032 \n", + "2 1655769720000 20606.69 20626.89 20552.40 20552.40 130.42894 \n", + "3 1655769780000 20552.41 20585.69 20539.09 20578.89 103.56318 \n", + "4 1655769840000 20578.89 20579.90 20537.57 20554.46 83.55509 \n", + "\n", + " Close_time Quote_asset_volume Number_of_trades \\\n", + "0 1655769659999 1.455321e+06 1150 \n", + "1 1655769719999 2.430514e+06 1402 \n", + "2 1655769779999 2.686026e+06 1433 \n", + "3 1655769839999 2.128819e+06 1301 \n", + "4 1655769899999 1.717907e+06 1098 \n", + "\n", + " Taker_buy_base_asset_volume Taker_buy_quote_asset_volume Ignore \n", + "0 37.36821 7.683845e+05 0 \n", + "1 61.25760 1.260950e+06 0 \n", + "2 55.80573 1.149409e+06 0 \n", + "3 64.57346 1.327338e+06 0 \n", + "4 36.40944 7.485065e+05 0 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def set_column_names(df: pd.DataFrame) -> pd.DataFrame:\n", + " column_names_mapping = {\n", + " 0: 'Open_time',\n", + " 1: 'Open',\n", + " 2: 'High',\n", + " 3: 'Low',\n", + " 4: 'Close',\n", + " 5: 'Volume',\n", + " 6: 'Close_time',\n", + " 7: 'Quote_asset_volume',\n", + " 8: 'Number_of_trades',\n", + " 9: 'Taker_buy_base_asset_volume',\n", + " 10: 'Taker_buy_quote_asset_volume',\n", + " 11: 'Ignore'\n", + " }\n", + " return df.rename(columns=column_names_mapping)\n", + "\n", + "btcusdt_df = set_column_names(btcusdt_df)\n", + "btcusdt_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Convert timestamp to human-readable date and time format:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "btcusdt_df['Open_time'] = btcusdt_df.iloc[:, 0].apply(lambda t: datetime.fromtimestamp(t/1000))\n", + "btcusdt_df['Close_time'] = btcusdt_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Find min and max time:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Open_timeClose_time
min2022-06-21 00:00:002022-06-21 00:00:59.999000
max2022-06-21 23:59:002022-06-21 23:59:59.999000
len14401440
\n", + "
" + ], + "text/plain": [ + " Open_time Close_time\n", + "min 2022-06-21 00:00:00 2022-06-21 00:00:59.999000\n", + "max 2022-06-21 23:59:00 2022-06-21 23:59:59.999000\n", + "len 1440 1440" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "btcusdt_df[['Open_time', 'Close_time']].aggregate(func=[min, max, len])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate 1-hour `OHLCV` candles:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolumeClose_time
020573.9020705.7420396.7720672.304235.773922022-06-21 00:59:59.999
120672.3020783.9920504.8920670.883892.914122022-06-21 01:59:59.999
220670.8720699.9320348.4020433.492876.889282022-06-21 02:59:59.999
320433.4920665.2620365.0020614.043306.770182022-06-21 03:59:59.999
420614.0420740.7220474.4120656.172925.335422022-06-21 04:59:59.999
520656.1721029.9320621.1420890.776516.656112022-06-21 05:59:59.999
620890.7721202.0020890.7721192.086114.516622022-06-21 06:59:59.999
721192.0821333.0020952.4621120.285433.086032022-06-21 07:59:59.999
821120.2821500.0121051.2521356.326241.368012022-06-21 08:59:59.999
921356.3221470.0021166.9421200.004961.574962022-06-21 09:59:59.999
1021200.0121307.8321061.5421299.273982.475782022-06-21 10:59:59.999
1121299.0221361.0320911.1320962.424685.425242022-06-21 11:59:59.999
1220962.4221163.2420853.3621106.205596.263032022-06-21 12:59:59.999
1321106.1921650.0020995.6421619.318667.590802022-06-21 13:59:59.999
1421619.3121723.0021427.8221590.796295.024292022-06-21 14:59:59.999
1521590.7921604.2721339.0721392.193809.546222022-06-21 15:59:59.999
1621392.1821550.3921355.7721401.522421.776292022-06-21 16:59:59.999
1721401.5221457.8221195.7021242.943755.829192022-06-21 17:59:59.999
1821242.9321256.9921076.4821100.012820.406752022-06-21 18:59:59.999
1921100.0021306.5120870.0120888.644015.335282022-06-21 19:59:59.999
2020888.6320987.3820666.0020859.864442.875962022-06-21 20:59:59.999
2120859.8621054.9920808.0020972.911813.562362022-06-21 21:59:59.999
2220972.9121003.7020741.0320897.002945.616502022-06-21 22:59:59.999
2320897.0020943.1720551.0020723.522613.774412022-06-21 23:59:59.999
\n", + "
" + ], + "text/plain": [ + " Open High Low Close Volume Close_time\n", + "0 20573.90 20705.74 20396.77 20672.30 4235.77392 2022-06-21 00:59:59.999\n", + "1 20672.30 20783.99 20504.89 20670.88 3892.91412 2022-06-21 01:59:59.999\n", + "2 20670.87 20699.93 20348.40 20433.49 2876.88928 2022-06-21 02:59:59.999\n", + "3 20433.49 20665.26 20365.00 20614.04 3306.77018 2022-06-21 03:59:59.999\n", + "4 20614.04 20740.72 20474.41 20656.17 2925.33542 2022-06-21 04:59:59.999\n", + "5 20656.17 21029.93 20621.14 20890.77 6516.65611 2022-06-21 05:59:59.999\n", + "6 20890.77 21202.00 20890.77 21192.08 6114.51662 2022-06-21 06:59:59.999\n", + "7 21192.08 21333.00 20952.46 21120.28 5433.08603 2022-06-21 07:59:59.999\n", + "8 21120.28 21500.01 21051.25 21356.32 6241.36801 2022-06-21 08:59:59.999\n", + "9 21356.32 21470.00 21166.94 21200.00 4961.57496 2022-06-21 09:59:59.999\n", + "10 21200.01 21307.83 21061.54 21299.27 3982.47578 2022-06-21 10:59:59.999\n", + "11 21299.02 21361.03 20911.13 20962.42 4685.42524 2022-06-21 11:59:59.999\n", + "12 20962.42 21163.24 20853.36 21106.20 5596.26303 2022-06-21 12:59:59.999\n", + "13 21106.19 21650.00 20995.64 21619.31 8667.59080 2022-06-21 13:59:59.999\n", + "14 21619.31 21723.00 21427.82 21590.79 6295.02429 2022-06-21 14:59:59.999\n", + "15 21590.79 21604.27 21339.07 21392.19 3809.54622 2022-06-21 15:59:59.999\n", + "16 21392.18 21550.39 21355.77 21401.52 2421.77629 2022-06-21 16:59:59.999\n", + "17 21401.52 21457.82 21195.70 21242.94 3755.82919 2022-06-21 17:59:59.999\n", + "18 21242.93 21256.99 21076.48 21100.01 2820.40675 2022-06-21 18:59:59.999\n", + "19 21100.00 21306.51 20870.01 20888.64 4015.33528 2022-06-21 19:59:59.999\n", + "20 20888.63 20987.38 20666.00 20859.86 4442.87596 2022-06-21 20:59:59.999\n", + "21 20859.86 21054.99 20808.00 20972.91 1813.56236 2022-06-21 21:59:59.999\n", + "22 20972.91 21003.70 20741.03 20897.00 2945.61650 2022-06-21 22:59:59.999\n", + "23 20897.00 20943.17 20551.00 20723.52 2613.77441 2022-06-21 23:59:59.999" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def calculate_ohclv(df: pd.DataFrame) -> pd.DataFrame:\n", + " df['hour'] = df['Close_time'].apply(lambda t: t.hour)\n", + "\n", + " return (\n", + " df\n", + " .groupby(['hour'])\n", + " .agg(\n", + " {\n", + " 'Open': 'first',\n", + " 'High': max,\n", + " 'Low': min,\n", + " 'Close': 'last',\n", + " 'Volume': sum,\n", + " 'Close_time': max\n", + " }\n", + " )\n", + " .reset_index()\n", + " .drop(columns=['hour'])\n", + " )\n", + "\n", + "btcusdt_1h_df = calculate_ohclv(btcusdt_df)\n", + "btcusdt_1h_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Validate results:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "assert(\n", + " isinstance(btcusdt_1h_df, pd.DataFrame)\n", + " and btcusdt_1h_df.shape == (24, 6)\n", + " and not btcusdt_1h_df.isnull().any().any()\n", + " and btcusdt_1h_df.iloc[:, 0:5].ge(0).all().all()\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Do the same for `BTC/USDC` pair:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolumeClose_time
020549.6520703.0820371.7520647.35284.735212022-06-21 00:59:59.999
120648.0620771.0720482.8320646.88192.505132022-06-21 01:59:59.999
220644.7820672.9820331.3620406.71195.670232022-06-21 02:59:59.999
320402.2720649.9920331.4820585.98290.392102022-06-21 03:59:59.999
420586.6320721.1020449.5720632.33205.784592022-06-21 04:59:59.999
520640.3320999.0020594.5820866.07412.424772022-06-21 05:59:59.999
620873.2521178.0020873.2521169.64524.258942022-06-21 06:59:59.999
721166.5821300.0020913.4021097.44304.651132022-06-21 07:59:59.999
821097.2021471.9121023.6421327.56366.292012022-06-21 08:59:59.999
921342.7621448.2221144.2421182.12350.775432022-06-21 09:59:59.999
1021175.7521298.5221038.3021270.98526.666022022-06-21 10:59:59.999
1121273.7321336.9420887.1720948.96579.002002022-06-21 11:59:59.999
1220936.5921143.9920800.0021079.42452.072142022-06-21 12:59:59.999
1321079.4121629.3620968.2121592.69507.911902022-06-21 13:59:59.999
1421595.1321699.9821394.0521572.00445.739782022-06-21 14:59:59.999
1521571.9621671.3421314.9421370.87435.992372022-06-21 15:59:59.999
1621370.0821533.1421330.0021377.52243.103682022-06-21 16:59:59.999
1721377.8521434.5821168.5921220.14291.461372022-06-21 17:59:59.999
1821220.1421233.9421054.7221074.20423.058362022-06-21 18:59:59.999
1921074.8121279.5220851.1020866.39266.464882022-06-21 19:59:59.999
2020864.3120960.9820645.0320838.52330.795692022-06-21 20:59:59.999
2120838.5121057.0120780.2920958.2299.098362022-06-21 21:59:59.999
2220950.0720975.9220719.0220875.37177.082032022-06-21 22:59:59.999
2320880.7120916.8520527.9020699.78173.227972022-06-21 23:59:59.999
\n", + "
" + ], + "text/plain": [ + " Open High Low Close Volume Close_time\n", + "0 20549.65 20703.08 20371.75 20647.35 284.73521 2022-06-21 00:59:59.999\n", + "1 20648.06 20771.07 20482.83 20646.88 192.50513 2022-06-21 01:59:59.999\n", + "2 20644.78 20672.98 20331.36 20406.71 195.67023 2022-06-21 02:59:59.999\n", + "3 20402.27 20649.99 20331.48 20585.98 290.39210 2022-06-21 03:59:59.999\n", + "4 20586.63 20721.10 20449.57 20632.33 205.78459 2022-06-21 04:59:59.999\n", + "5 20640.33 20999.00 20594.58 20866.07 412.42477 2022-06-21 05:59:59.999\n", + "6 20873.25 21178.00 20873.25 21169.64 524.25894 2022-06-21 06:59:59.999\n", + "7 21166.58 21300.00 20913.40 21097.44 304.65113 2022-06-21 07:59:59.999\n", + "8 21097.20 21471.91 21023.64 21327.56 366.29201 2022-06-21 08:59:59.999\n", + "9 21342.76 21448.22 21144.24 21182.12 350.77543 2022-06-21 09:59:59.999\n", + "10 21175.75 21298.52 21038.30 21270.98 526.66602 2022-06-21 10:59:59.999\n", + "11 21273.73 21336.94 20887.17 20948.96 579.00200 2022-06-21 11:59:59.999\n", + "12 20936.59 21143.99 20800.00 21079.42 452.07214 2022-06-21 12:59:59.999\n", + "13 21079.41 21629.36 20968.21 21592.69 507.91190 2022-06-21 13:59:59.999\n", + "14 21595.13 21699.98 21394.05 21572.00 445.73978 2022-06-21 14:59:59.999\n", + "15 21571.96 21671.34 21314.94 21370.87 435.99237 2022-06-21 15:59:59.999\n", + "16 21370.08 21533.14 21330.00 21377.52 243.10368 2022-06-21 16:59:59.999\n", + "17 21377.85 21434.58 21168.59 21220.14 291.46137 2022-06-21 17:59:59.999\n", + "18 21220.14 21233.94 21054.72 21074.20 423.05836 2022-06-21 18:59:59.999\n", + "19 21074.81 21279.52 20851.10 20866.39 266.46488 2022-06-21 19:59:59.999\n", + "20 20864.31 20960.98 20645.03 20838.52 330.79569 2022-06-21 20:59:59.999\n", + "21 20838.51 21057.01 20780.29 20958.22 99.09836 2022-06-21 21:59:59.999\n", + "22 20950.07 20975.92 20719.02 20875.37 177.08203 2022-06-21 22:59:59.999\n", + "23 20880.71 20916.85 20527.90 20699.78 173.22797 2022-06-21 23:59:59.999" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "btcusdc_df = get_data('BTCUSDC')\n", + "btcusdc_df = set_column_names(btcusdc_df)\n", + "btcusdc_df['Close_time'] = btcusdc_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000))\n", + "\n", + "btcusdc_1h_df = calculate_ohclv(btcusdc_df)\n", + "btcusdc_1h_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Join altogether:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "btcusdt_1h_df['pair'] = 'BTC-USDT'\n", + "btcusdc_1h_df['pair'] = 'BTC-USDC'\n", + "\n", + "candles_1h_df = pd.concat([btcusdt_1h_df, btcusdc_1h_df])\n", + "\n", + "assert(\n", + " isinstance(candles_1h_df, pd.DataFrame)\n", + " and candles_1h_df.shape == (48, 7)\n", + " and (candles_1h_df['pair'].unique() == ['BTC-USDT', 'BTC-USDC']).all()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot something interesting... :bulb:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Enrich dataset using Open FIGI API Interaction" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "JSONDecodeError", + "evalue": "Expecting value: line 1 column 1 (char 0)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/dictator/apps/resistance/src/candidate.ipynb Cell 24'\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m json_response \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mjson()\n\u001b[1;32m 7\u001b[0m \u001b[39mreturn\u001b[39;00m pd\u001b[39m.\u001b[39mDataFrame\u001b[39m.\u001b[39mfrom_dict(json_response[\u001b[39m'\u001b[39m\u001b[39mresult\u001b[39m\u001b[39m'\u001b[39m], orient\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m----> 9\u001b[0m send_request(\u001b[39m'\u001b[39;49m\u001b[39mBTCUSDT\u001b[39;49m\u001b[39m'\u001b[39;49m)\n", + "\u001b[1;32m/home/dictator/apps/resistance/src/candidate.ipynb Cell 24'\u001b[0m in \u001b[0;36msend_request\u001b[0;34m(ticker)\u001b[0m\n\u001b[1;32m 2\u001b[0m api_url \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mhttps://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22Curncy%22&num_rows=100&simpleSearchString=\u001b[39m\u001b[39m{\u001b[39;00mticker\u001b[39m}\u001b[39;00m\u001b[39m&start=0\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 4\u001b[0m response \u001b[39m=\u001b[39m httpx\u001b[39m.\u001b[39mget(api_url)\n\u001b[0;32m----> 5\u001b[0m json_response \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39;49mjson()\n\u001b[1;32m 7\u001b[0m \u001b[39mreturn\u001b[39;00m pd\u001b[39m.\u001b[39mDataFrame\u001b[39m.\u001b[39mfrom_dict(json_response[\u001b[39m'\u001b[39m\u001b[39mresult\u001b[39m\u001b[39m'\u001b[39m], orient\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m'\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/httpx/_models.py:1517\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 1515\u001b[0m \u001b[39mif\u001b[39;00m encoding \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m jsonlib\u001b[39m.\u001b[39mloads(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcontent\u001b[39m.\u001b[39mdecode(encoding), \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m-> 1517\u001b[0m \u001b[39mreturn\u001b[39;00m jsonlib\u001b[39m.\u001b[39;49mloads(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtext, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 341\u001b[0m s \u001b[39m=\u001b[39m s\u001b[39m.\u001b[39mdecode(detect_encoding(s), \u001b[39m'\u001b[39m\u001b[39msurrogatepass\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 343\u001b[0m \u001b[39mif\u001b[39;00m (\u001b[39mcls\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m object_hook \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m\n\u001b[1;32m 344\u001b[0m parse_int \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m parse_float \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m\n\u001b[1;32m 345\u001b[0m parse_constant \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m object_pairs_hook \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m \u001b[39mreturn\u001b[39;00m _default_decoder\u001b[39m.\u001b[39;49mdecode(s)\n\u001b[1;32m 347\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mcls\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 348\u001b[0m \u001b[39mcls\u001b[39m \u001b[39m=\u001b[39m JSONDecoder\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdecode\u001b[39m(\u001b[39mself\u001b[39m, s, _w\u001b[39m=\u001b[39mWHITESPACE\u001b[39m.\u001b[39mmatch):\n\u001b[1;32m 333\u001b[0m \u001b[39m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[39m containing a JSON document).\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \n\u001b[1;32m 336\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m obj, end \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mraw_decode(s, idx\u001b[39m=\u001b[39;49m_w(s, \u001b[39m0\u001b[39;49m)\u001b[39m.\u001b[39;49mend())\n\u001b[1;32m 338\u001b[0m end \u001b[39m=\u001b[39m _w(s, end)\u001b[39m.\u001b[39mend()\n\u001b[1;32m 339\u001b[0m \u001b[39mif\u001b[39;00m end \u001b[39m!=\u001b[39m \u001b[39mlen\u001b[39m(s):\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 353\u001b[0m obj, end \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mscan_once(s, idx)\n\u001b[1;32m 354\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m \u001b[39mraise\u001b[39;00m JSONDecodeError(\u001b[39m\"\u001b[39m\u001b[39mExpecting value\u001b[39m\u001b[39m\"\u001b[39m, s, err\u001b[39m.\u001b[39mvalue) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39m\n\u001b[1;32m 356\u001b[0m \u001b[39mreturn\u001b[39;00m obj, end\n", + "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)" + ] + } + ], + "source": [ + "def send_request(ticker: str) -> pd.DataFrame:\n", + " api_url = f'https://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22Curncy%22&num_rows=100&simpleSearchString={ticker}&start=0'\n", + "\n", + " response = httpx.get(api_url)\n", + " json_response = response.json()\n", + " \n", + " return pd.DataFrame.from_dict(json_response['result'], orient='columns')\n", + "\n", + "send_request('BTCUSDT')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.13 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "6fd7ff10be7e3a66c1b3745c4cbc00041a2589eb74ab4be46a3698a7b56001aa" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c47f8bf8c3ef896b0dfadf1a93b734d1bf7464e9 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Fri, 24 Jun 2022 11:39:23 +0000 Subject: [PATCH 09/17] Update test --- src/candidate_tests.ipynb | 1144 +++++++++++++++++++++++++++++++++---- 1 file changed, 1022 insertions(+), 122 deletions(-) diff --git a/src/candidate_tests.ipynb b/src/candidate_tests.ipynb index 643e5f9..4b9e15e 100644 --- a/src/candidate_tests.ipynb +++ b/src/candidate_tests.ipynb @@ -4,40 +4,40 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# A little non-economic research\n", + "# Analyst Code Interview\n", "\n", + "Hi,\n", "\n", - "## Existing quiz tests\n", + "It's a simple code interview. I shouldn't take you more than an hour, and it's fun :)\n", "\n", - "https://www.linkedin.com/skill-assessments/hub/quizzes/ \n", + "## Part I. Quizzes\n", + "\n", + "If you didn't have `Python badge` on LinkedIn then it's time [to do it](https://www.linkedin.com/skill-assessments/hub/quizzes/)! Please share your results with us. By the way, here's my badge.\n", "\n", "![](../docs/li.png)\n", "\n", - "https://www.w3schools.com/quiztest/quiztest.asp?qtest=PANDAS\n", - "\n", - "\n", - "![](../docs/w3.png)\n" + "If you don't have a LinkedIn account or like quizzes, then take another test from [W3 School](https://www.w3schools.com/quiztest/quiztest.asp?qtest=PANDAS).\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Code interview\n", + "## Part II. Coding\n", "\n", - "Datasets:\n", + "Please share the screen with us and let's repair our program together. \n", + "You can use google.com, StackOverflow, or your favorite IDE. Please use a version of Python greater or equals than 3.8\n", "\n", - "1. Binance Open Data: [spot candles](https://github.com/binance/binance-public-data/#klines)\n", - "2. OpenFIGI: [search API](https://www.openfigi.com/search).\n", + "We intend to collect data from [Binance Open Data](https://github.com/binance/binance-public-data/#klines) and analyze it.\n", "\n", - "### Binance Open Data\n", + "### Stet 1. Download data \n", "\n", - "Downloading candles for `BTC/USDT` and `BTC/UDSC` using `bash` or `powershell`:" + "Downloading __1-minute candles__ for `BTC/USDT` and `BTC/UDSC` using `bash` or `powershell` scripts:" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": { "vscode": { "languageId": "shellscript" @@ -48,23 +48,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "mkdir: cannot create directory ‘../data/binance’: File exists\n", - "--2022-06-23 12:48:13-- https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", - "Resolving data.binance.vision (data.binance.vision)... 13.224.2.90, 13.224.2.55, 13.224.2.128, ...\n", - "Connecting to data.binance.vision (data.binance.vision)|13.224.2.90|:443... connected.\n", + "mkdir: cannot create directory ‘../data’: File exists\n", + "--2022-06-24 11:22:56-- https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", + "Resolving data.binance.vision (data.binance.vision)... 108.138.94.79, 108.138.94.48, 108.138.94.67, ...\n", + "Connecting to data.binance.vision (data.binance.vision)|108.138.94.79|:443... connected.\n", "HTTP request sent, awaiting response... 304 Not Modified\n", - "File ‘../data/binance/BTCUSDT-1m-2022-06-21.zip’ not modified on server. Omitting download.\n", + "File ‘../data/BTCUSDT-1m-2022-06-21.zip’ not modified on server. Omitting download.\n", "\n", - "--2022-06-23 12:48:14-- https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", - "Resolving data.binance.vision (data.binance.vision)... 13.224.2.90, 13.224.2.55, 13.224.2.128, ...\n", - "Connecting to data.binance.vision (data.binance.vision)|13.224.2.90|:443... connected.\n", + "--2022-06-24 11:22:57-- https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", + "Resolving data.binance.vision (data.binance.vision)... 108.138.94.88, 108.138.94.67, 108.138.94.48, ...\n", + "Connecting to data.binance.vision (data.binance.vision)|108.138.94.88|:443... connected.\n", "HTTP request sent, awaiting response... 304 Not Modified\n", - "File ‘../data/binance/BTCUSDC-1m-2022-06-21.zip’ not modified on server. Omitting download.\n", + "File ‘../data/BTCUSDC-1m-2022-06-21.zip’ not modified on server. Omitting download.\n", "\n", - "Archive: ../data/binance/BTCUSDT-1m-2022-06-21.zip\n", - " inflating: ../data/binance/BTCUSDT-1m-2022-06-21.csv \n", - "Archive: ../data/binance/BTCUSDC-1m-2022-06-21.zip\n", - " inflating: ../data/binance/BTCUSDC-1m-2022-06-21.csv \n" + "Archive: ../data/BTCUSDT-1m-2022-06-21.zip\n", + " inflating: ../data/BTCUSDT-1m-2022-06-21.csv \n", + "Archive: ../data/BTCUSDC-1m-2022-06-21.zip\n", + " inflating: ../data/BTCUSDC-1m-2022-06-21.csv \n" ] } ], @@ -72,27 +72,29 @@ "#!/bin/sh\n", "\n", "# create dir for data\n", - "!mkdir ../data/binance\n", + "!mkdir ../data\n", "\n", "# download data using GET request\n", - "!wget -N -P ../data/binance https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", - "!wget -N -P../data/binance https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", + "!wget -N -P ../data https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", + "!wget -N -P../data https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", "\n", "# unzip\n", - "!unzip -o -d ../data/binance ../data/binance/BTCUSDT-1m-2022-06-21.zip \n", - "!unzip -o -d ../data/binance ../data/binance/BTCUSDC-1m-2022-06-21.zip" + "!unzip -o -d ../data ../data/BTCUSDT-1m-2022-06-21.zip \n", + "!unzip -o -d ../data ../data/BTCUSDC-1m-2022-06-21.zip" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Import packages for data analysis" + "### Step 2: Import data to Dataframe \n", + "\n", + "Import packages for data analysis:" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -108,12 +110,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Read data from CSV file to Pandas DataFrame:" + "Import data from CSV file to Pandas DataFrame:" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -247,7 +249,7 @@ "4 1655769899999 1.717907e+06 1098 36.40944 7.485065e+05 0 " ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -264,12 +266,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Set names to columns with 1m candles:" + "Set names to columns:" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -410,7 +412,7 @@ "4 36.40944 7.485065e+05 0 " ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -446,24 +448,7 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "btcusdt_df['Open_time'] = btcusdt_df.iloc[:, 0].apply(lambda t: datetime.fromtimestamp(t/1000))\n", - "btcusdt_df['Close_time'] = btcusdt_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Find min and max time:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -488,55 +473,369 @@ " \n", " \n", " Open_time\n", + " Open\n", + " High\n", + " Low\n", + " Close\n", + " Volume\n", " Close_time\n", + " Quote_asset_volume\n", + " Number_of_trades\n", + " Taker_buy_base_asset_volume\n", + " Taker_buy_quote_asset_volume\n", + " Ignore\n", " \n", " \n", " \n", " \n", - " min\n", + " 0\n", " 2022-06-21 00:00:00\n", - " 2022-06-21 00:00:59.999000\n", + " 20573.90\n", + " 20590.00\n", + " 20552.17\n", + " 20558.36\n", + " 70.76925\n", + " 2022-06-21 00:00:59.999\n", + " 1.455321e+06\n", + " 1150\n", + " 37.36821\n", + " 7.683845e+05\n", + " 0\n", " \n", " \n", - " max\n", - " 2022-06-21 23:59:00\n", - " 2022-06-21 23:59:59.999000\n", + " 1\n", + " 2022-06-21 00:01:00\n", + " 20558.35\n", + " 20611.21\n", + " 20558.35\n", + " 20606.70\n", + " 118.06032\n", + " 2022-06-21 00:01:59.999\n", + " 2.430514e+06\n", + " 1402\n", + " 61.25760\n", + " 1.260950e+06\n", + " 0\n", " \n", " \n", - " len\n", - " 1440\n", - " 1440\n", + " 2\n", + " 2022-06-21 00:02:00\n", + " 20606.69\n", + " 20626.89\n", + " 20552.40\n", + " 20552.40\n", + " 130.42894\n", + " 2022-06-21 00:02:59.999\n", + " 2.686026e+06\n", + " 1433\n", + " 55.80573\n", + " 1.149409e+06\n", + " 0\n", + " \n", + " \n", + " 3\n", + " 2022-06-21 00:03:00\n", + " 20552.41\n", + " 20585.69\n", + " 20539.09\n", + " 20578.89\n", + " 103.56318\n", + " 2022-06-21 00:03:59.999\n", + " 2.128819e+06\n", + " 1301\n", + " 64.57346\n", + " 1.327338e+06\n", + " 0\n", + " \n", + " \n", + " 4\n", + " 2022-06-21 00:04:00\n", + " 20578.89\n", + " 20579.90\n", + " 20537.57\n", + " 20554.46\n", + " 83.55509\n", + " 2022-06-21 00:04:59.999\n", + " 1.717907e+06\n", + " 1098\n", + " 36.40944\n", + " 7.485065e+05\n", + " 0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Open_time Close_time\n", - "min 2022-06-21 00:00:00 2022-06-21 00:00:59.999000\n", - "max 2022-06-21 23:59:00 2022-06-21 23:59:59.999000\n", - "len 1440 1440" + " Open_time Open High Low Close Volume \\\n", + "0 2022-06-21 00:00:00 20573.90 20590.00 20552.17 20558.36 70.76925 \n", + "1 2022-06-21 00:01:00 20558.35 20611.21 20558.35 20606.70 118.06032 \n", + "2 2022-06-21 00:02:00 20606.69 20626.89 20552.40 20552.40 130.42894 \n", + "3 2022-06-21 00:03:00 20552.41 20585.69 20539.09 20578.89 103.56318 \n", + "4 2022-06-21 00:04:00 20578.89 20579.90 20537.57 20554.46 83.55509 \n", + "\n", + " Close_time Quote_asset_volume Number_of_trades \\\n", + "0 2022-06-21 00:00:59.999 1.455321e+06 1150 \n", + "1 2022-06-21 00:01:59.999 2.430514e+06 1402 \n", + "2 2022-06-21 00:02:59.999 2.686026e+06 1433 \n", + "3 2022-06-21 00:03:59.999 2.128819e+06 1301 \n", + "4 2022-06-21 00:04:59.999 1.717907e+06 1098 \n", + "\n", + " Taker_buy_base_asset_volume Taker_buy_quote_asset_volume Ignore \n", + "0 37.36821 7.683845e+05 0 \n", + "1 61.25760 1.260950e+06 0 \n", + "2 55.80573 1.149409e+06 0 \n", + "3 64.57346 1.327338e+06 0 \n", + "4 36.40944 7.485065e+05 0 " ] }, - "execution_count": 7, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "btcusdt_df[['Open_time', 'Close_time']].aggregate(func=[min, max, len])" + "btcusdt_df['Open_time'] = btcusdt_df.iloc[:, 0].apply(lambda t: datetime.fromtimestamp(t/1000))\n", + "btcusdt_df['Close_time'] = btcusdt_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000))\n", + "\n", + "btcusdt_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Calculate 1-hour `OHLCV` candles:" + "Let's take a look at _Descriptive statistics_ (min, mean, max, standard deviation):" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Open_timeOpenHighLowCloseVolumeClose_timeQuote_asset_volumeNumber_of_tradesTaker_buy_base_asset_volumeTaker_buy_quote_asset_volumeIgnore
count14401440.0000001440.0000001440.0000001440.0000001440.00000014401.440000e+031440.0000001440.0000001.440000e+031440.0
mean2022-06-21 11:59:3021014.35546521033.87731920996.87103521014.45808372.4794352022-06-21 12:00:29.9990003201.526793e+061052.26458336.2452087.635009e+050.0
min2022-06-21 00:00:0020377.84000020396.83000020348.40000020377.8500005.3914202022-06-21 00:00:59.9990001.128460e+05227.0000001.9779604.127662e+040.0
25%2022-06-21 05:59:4520737.96500020763.56750020713.95500020737.97000034.1392272022-06-21 06:00:44.9990000647.153174e+05636.00000015.9056503.315046e+050.0
50%2022-06-21 11:59:3021073.58500021095.07500021052.21000021073.59000052.6157302022-06-21 12:00:29.9990000641.100389e+06859.00000024.9484955.249121e+050.0
75%2022-06-21 17:59:1521241.47000021260.69000021224.11500021241.46750082.0582072022-06-21 18:00:14.9990000641.718243e+061225.25000039.8596558.356794e+050.0
max2022-06-21 23:59:0021691.55000021723.00000021631.91000021691.550000732.1814002022-06-21 23:59:59.9990001.571181e+078776.000000471.9332101.013001e+070.0
stdNaN325.066922325.563767324.575912324.95290871.230479NaN1.511238e+06736.29048041.5594218.819089e+050.0
\n", + "
" + ], + "text/plain": [ + " Open_time Open High Low \\\n", + "count 1440 1440.000000 1440.000000 1440.000000 \n", + "mean 2022-06-21 11:59:30 21014.355465 21033.877319 20996.871035 \n", + "min 2022-06-21 00:00:00 20377.840000 20396.830000 20348.400000 \n", + "25% 2022-06-21 05:59:45 20737.965000 20763.567500 20713.955000 \n", + "50% 2022-06-21 11:59:30 21073.585000 21095.075000 21052.210000 \n", + "75% 2022-06-21 17:59:15 21241.470000 21260.690000 21224.115000 \n", + "max 2022-06-21 23:59:00 21691.550000 21723.000000 21631.910000 \n", + "std NaN 325.066922 325.563767 324.575912 \n", + "\n", + " Close Volume Close_time \\\n", + "count 1440.000000 1440.000000 1440 \n", + "mean 21014.458083 72.479435 2022-06-21 12:00:29.999000320 \n", + "min 20377.850000 5.391420 2022-06-21 00:00:59.999000 \n", + "25% 20737.970000 34.139227 2022-06-21 06:00:44.999000064 \n", + "50% 21073.590000 52.615730 2022-06-21 12:00:29.999000064 \n", + "75% 21241.467500 82.058207 2022-06-21 18:00:14.999000064 \n", + "max 21691.550000 732.181400 2022-06-21 23:59:59.999000 \n", + "std 324.952908 71.230479 NaN \n", + "\n", + " Quote_asset_volume Number_of_trades Taker_buy_base_asset_volume \\\n", + "count 1.440000e+03 1440.000000 1440.000000 \n", + "mean 1.526793e+06 1052.264583 36.245208 \n", + "min 1.128460e+05 227.000000 1.977960 \n", + "25% 7.153174e+05 636.000000 15.905650 \n", + "50% 1.100389e+06 859.000000 24.948495 \n", + "75% 1.718243e+06 1225.250000 39.859655 \n", + "max 1.571181e+07 8776.000000 471.933210 \n", + "std 1.511238e+06 736.290480 41.559421 \n", + "\n", + " Taker_buy_quote_asset_volume Ignore \n", + "count 1.440000e+03 1440.0 \n", + "mean 7.635009e+05 0.0 \n", + "min 4.127662e+04 0.0 \n", + "25% 3.315046e+05 0.0 \n", + "50% 5.249121e+05 0.0 \n", + "75% 8.356794e+05 0.0 \n", + "max 1.013001e+07 0.0 \n", + "std 8.819089e+05 0.0 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "btcusdt_df.describe(datetime_is_numeric=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Transform data\n", + "\n", + "Calculate __1-hour OHLCV__ candles:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -817,7 +1116,7 @@ "23 20897.00 20943.17 20551.00 20723.52 2613.77441 2022-06-21 23:59:59.999" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -844,6 +1143,7 @@ " )\n", "\n", "btcusdt_1h_df = calculate_ohclv(btcusdt_df)\n", + "\n", "btcusdt_1h_df" ] }, @@ -851,12 +1151,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Validate results:" + "Data validation is very important. Let's write domain-driven asserts:" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -872,12 +1172,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Do the same for `BTC/USDC` pair:" + "### Step 3: Expand the dataset with information about `BTC/USDC` \n", + "\n", + "Download `BTC/USDC` 1-minute candles and transform it to 1-hour candles:" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -1158,17 +1460,17 @@ "23 20880.71 20916.85 20527.90 20699.78 173.22797 2022-06-21 23:59:59.999" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "btcusdc_df = get_data('BTCUSDC')\n", - "btcusdc_df = set_column_names(btcusdc_df)\n", - "btcusdc_df['Close_time'] = btcusdc_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000))\n", + "btcusdc_df = get_data('BTCUSDC') # download data\n", + "btcusdc_df = set_column_names(btcusdc_df) # set column names\n", + "btcusdc_df['Close_time'] = btcusdc_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000)) # convert timestamp to date+time\n", "\n", - "btcusdc_1h_df = calculate_ohclv(btcusdc_df)\n", + "btcusdc_1h_df = calculate_ohclv(btcusdc_df) # calculate 1h OHCLV candles\n", "btcusdc_1h_df" ] }, @@ -1181,68 +1483,666 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolumeClose_timepair
020573.9020705.7420396.7720672.304235.773922022-06-21 00:59:59.999BTC-USDT
020549.6520703.0820371.7520647.35284.735212022-06-21 00:59:59.999BTC-USDC
120648.0620771.0720482.8320646.88192.505132022-06-21 01:59:59.999BTC-USDC
120672.3020783.9920504.8920670.883892.914122022-06-21 01:59:59.999BTC-USDT
220670.8720699.9320348.4020433.492876.889282022-06-21 02:59:59.999BTC-USDT
220644.7820672.9820331.3620406.71195.670232022-06-21 02:59:59.999BTC-USDC
320433.4920665.2620365.0020614.043306.770182022-06-21 03:59:59.999BTC-USDT
320402.2720649.9920331.4820585.98290.392102022-06-21 03:59:59.999BTC-USDC
420614.0420740.7220474.4120656.172925.335422022-06-21 04:59:59.999BTC-USDT
420586.6320721.1020449.5720632.33205.784592022-06-21 04:59:59.999BTC-USDC
520656.1721029.9320621.1420890.776516.656112022-06-21 05:59:59.999BTC-USDT
520640.3320999.0020594.5820866.07412.424772022-06-21 05:59:59.999BTC-USDC
620890.7721202.0020890.7721192.086114.516622022-06-21 06:59:59.999BTC-USDT
620873.2521178.0020873.2521169.64524.258942022-06-21 06:59:59.999BTC-USDC
721192.0821333.0020952.4621120.285433.086032022-06-21 07:59:59.999BTC-USDT
721166.5821300.0020913.4021097.44304.651132022-06-21 07:59:59.999BTC-USDC
821097.2021471.9121023.6421327.56366.292012022-06-21 08:59:59.999BTC-USDC
821120.2821500.0121051.2521356.326241.368012022-06-21 08:59:59.999BTC-USDT
921342.7621448.2221144.2421182.12350.775432022-06-21 09:59:59.999BTC-USDC
921356.3221470.0021166.9421200.004961.574962022-06-21 09:59:59.999BTC-USDT
1021175.7521298.5221038.3021270.98526.666022022-06-21 10:59:59.999BTC-USDC
1021200.0121307.8321061.5421299.273982.475782022-06-21 10:59:59.999BTC-USDT
1121273.7321336.9420887.1720948.96579.002002022-06-21 11:59:59.999BTC-USDC
1121299.0221361.0320911.1320962.424685.425242022-06-21 11:59:59.999BTC-USDT
1220936.5921143.9920800.0021079.42452.072142022-06-21 12:59:59.999BTC-USDC
1220962.4221163.2420853.3621106.205596.263032022-06-21 12:59:59.999BTC-USDT
1321079.4121629.3620968.2121592.69507.911902022-06-21 13:59:59.999BTC-USDC
1321106.1921650.0020995.6421619.318667.590802022-06-21 13:59:59.999BTC-USDT
1421619.3121723.0021427.8221590.796295.024292022-06-21 14:59:59.999BTC-USDT
1421595.1321699.9821394.0521572.00445.739782022-06-21 14:59:59.999BTC-USDC
1521571.9621671.3421314.9421370.87435.992372022-06-21 15:59:59.999BTC-USDC
1521590.7921604.2721339.0721392.193809.546222022-06-21 15:59:59.999BTC-USDT
1621370.0821533.1421330.0021377.52243.103682022-06-21 16:59:59.999BTC-USDC
1621392.1821550.3921355.7721401.522421.776292022-06-21 16:59:59.999BTC-USDT
1721377.8521434.5821168.5921220.14291.461372022-06-21 17:59:59.999BTC-USDC
1721401.5221457.8221195.7021242.943755.829192022-06-21 17:59:59.999BTC-USDT
1821242.9321256.9921076.4821100.012820.406752022-06-21 18:59:59.999BTC-USDT
1821220.1421233.9421054.7221074.20423.058362022-06-21 18:59:59.999BTC-USDC
1921074.8121279.5220851.1020866.39266.464882022-06-21 19:59:59.999BTC-USDC
1921100.0021306.5120870.0120888.644015.335282022-06-21 19:59:59.999BTC-USDT
2020864.3120960.9820645.0320838.52330.795692022-06-21 20:59:59.999BTC-USDC
2020888.6320987.3820666.0020859.864442.875962022-06-21 20:59:59.999BTC-USDT
2120838.5121057.0120780.2920958.2299.098362022-06-21 21:59:59.999BTC-USDC
2120859.8621054.9920808.0020972.911813.562362022-06-21 21:59:59.999BTC-USDT
2220950.0720975.9220719.0220875.37177.082032022-06-21 22:59:59.999BTC-USDC
2220972.9121003.7020741.0320897.002945.616502022-06-21 22:59:59.999BTC-USDT
2320897.0020943.1720551.0020723.522613.774412022-06-21 23:59:59.999BTC-USDT
2320880.7120916.8520527.9020699.78173.227972022-06-21 23:59:59.999BTC-USDC
\n", + "
" + ], + "text/plain": [ + " Open High Low Close Volume \\\n", + "0 20573.90 20705.74 20396.77 20672.30 4235.77392 \n", + "0 20549.65 20703.08 20371.75 20647.35 284.73521 \n", + "1 20648.06 20771.07 20482.83 20646.88 192.50513 \n", + "1 20672.30 20783.99 20504.89 20670.88 3892.91412 \n", + "2 20670.87 20699.93 20348.40 20433.49 2876.88928 \n", + "2 20644.78 20672.98 20331.36 20406.71 195.67023 \n", + "3 20433.49 20665.26 20365.00 20614.04 3306.77018 \n", + "3 20402.27 20649.99 20331.48 20585.98 290.39210 \n", + "4 20614.04 20740.72 20474.41 20656.17 2925.33542 \n", + "4 20586.63 20721.10 20449.57 20632.33 205.78459 \n", + "5 20656.17 21029.93 20621.14 20890.77 6516.65611 \n", + "5 20640.33 20999.00 20594.58 20866.07 412.42477 \n", + "6 20890.77 21202.00 20890.77 21192.08 6114.51662 \n", + "6 20873.25 21178.00 20873.25 21169.64 524.25894 \n", + "7 21192.08 21333.00 20952.46 21120.28 5433.08603 \n", + "7 21166.58 21300.00 20913.40 21097.44 304.65113 \n", + "8 21097.20 21471.91 21023.64 21327.56 366.29201 \n", + "8 21120.28 21500.01 21051.25 21356.32 6241.36801 \n", + "9 21342.76 21448.22 21144.24 21182.12 350.77543 \n", + "9 21356.32 21470.00 21166.94 21200.00 4961.57496 \n", + "10 21175.75 21298.52 21038.30 21270.98 526.66602 \n", + "10 21200.01 21307.83 21061.54 21299.27 3982.47578 \n", + "11 21273.73 21336.94 20887.17 20948.96 579.00200 \n", + "11 21299.02 21361.03 20911.13 20962.42 4685.42524 \n", + "12 20936.59 21143.99 20800.00 21079.42 452.07214 \n", + "12 20962.42 21163.24 20853.36 21106.20 5596.26303 \n", + "13 21079.41 21629.36 20968.21 21592.69 507.91190 \n", + "13 21106.19 21650.00 20995.64 21619.31 8667.59080 \n", + "14 21619.31 21723.00 21427.82 21590.79 6295.02429 \n", + "14 21595.13 21699.98 21394.05 21572.00 445.73978 \n", + "15 21571.96 21671.34 21314.94 21370.87 435.99237 \n", + "15 21590.79 21604.27 21339.07 21392.19 3809.54622 \n", + "16 21370.08 21533.14 21330.00 21377.52 243.10368 \n", + "16 21392.18 21550.39 21355.77 21401.52 2421.77629 \n", + "17 21377.85 21434.58 21168.59 21220.14 291.46137 \n", + "17 21401.52 21457.82 21195.70 21242.94 3755.82919 \n", + "18 21242.93 21256.99 21076.48 21100.01 2820.40675 \n", + "18 21220.14 21233.94 21054.72 21074.20 423.05836 \n", + "19 21074.81 21279.52 20851.10 20866.39 266.46488 \n", + "19 21100.00 21306.51 20870.01 20888.64 4015.33528 \n", + "20 20864.31 20960.98 20645.03 20838.52 330.79569 \n", + "20 20888.63 20987.38 20666.00 20859.86 4442.87596 \n", + "21 20838.51 21057.01 20780.29 20958.22 99.09836 \n", + "21 20859.86 21054.99 20808.00 20972.91 1813.56236 \n", + "22 20950.07 20975.92 20719.02 20875.37 177.08203 \n", + "22 20972.91 21003.70 20741.03 20897.00 2945.61650 \n", + "23 20897.00 20943.17 20551.00 20723.52 2613.77441 \n", + "23 20880.71 20916.85 20527.90 20699.78 173.22797 \n", + "\n", + " Close_time pair \n", + "0 2022-06-21 00:59:59.999 BTC-USDT \n", + "0 2022-06-21 00:59:59.999 BTC-USDC \n", + "1 2022-06-21 01:59:59.999 BTC-USDC \n", + "1 2022-06-21 01:59:59.999 BTC-USDT \n", + "2 2022-06-21 02:59:59.999 BTC-USDT \n", + "2 2022-06-21 02:59:59.999 BTC-USDC \n", + "3 2022-06-21 03:59:59.999 BTC-USDT \n", + "3 2022-06-21 03:59:59.999 BTC-USDC \n", + "4 2022-06-21 04:59:59.999 BTC-USDT \n", + "4 2022-06-21 04:59:59.999 BTC-USDC \n", + "5 2022-06-21 05:59:59.999 BTC-USDT \n", + "5 2022-06-21 05:59:59.999 BTC-USDC \n", + "6 2022-06-21 06:59:59.999 BTC-USDT \n", + "6 2022-06-21 06:59:59.999 BTC-USDC \n", + "7 2022-06-21 07:59:59.999 BTC-USDT \n", + "7 2022-06-21 07:59:59.999 BTC-USDC \n", + "8 2022-06-21 08:59:59.999 BTC-USDC \n", + "8 2022-06-21 08:59:59.999 BTC-USDT \n", + "9 2022-06-21 09:59:59.999 BTC-USDC \n", + "9 2022-06-21 09:59:59.999 BTC-USDT \n", + "10 2022-06-21 10:59:59.999 BTC-USDC \n", + "10 2022-06-21 10:59:59.999 BTC-USDT \n", + "11 2022-06-21 11:59:59.999 BTC-USDC \n", + "11 2022-06-21 11:59:59.999 BTC-USDT \n", + "12 2022-06-21 12:59:59.999 BTC-USDC \n", + "12 2022-06-21 12:59:59.999 BTC-USDT \n", + "13 2022-06-21 13:59:59.999 BTC-USDC \n", + "13 2022-06-21 13:59:59.999 BTC-USDT \n", + "14 2022-06-21 14:59:59.999 BTC-USDT \n", + "14 2022-06-21 14:59:59.999 BTC-USDC \n", + "15 2022-06-21 15:59:59.999 BTC-USDC \n", + "15 2022-06-21 15:59:59.999 BTC-USDT \n", + "16 2022-06-21 16:59:59.999 BTC-USDC \n", + "16 2022-06-21 16:59:59.999 BTC-USDT \n", + "17 2022-06-21 17:59:59.999 BTC-USDC \n", + "17 2022-06-21 17:59:59.999 BTC-USDT \n", + "18 2022-06-21 18:59:59.999 BTC-USDT \n", + "18 2022-06-21 18:59:59.999 BTC-USDC \n", + "19 2022-06-21 19:59:59.999 BTC-USDC \n", + "19 2022-06-21 19:59:59.999 BTC-USDT \n", + "20 2022-06-21 20:59:59.999 BTC-USDC \n", + "20 2022-06-21 20:59:59.999 BTC-USDT \n", + "21 2022-06-21 21:59:59.999 BTC-USDC \n", + "21 2022-06-21 21:59:59.999 BTC-USDT \n", + "22 2022-06-21 22:59:59.999 BTC-USDC \n", + "22 2022-06-21 22:59:59.999 BTC-USDT \n", + "23 2022-06-21 23:59:59.999 BTC-USDT \n", + "23 2022-06-21 23:59:59.999 BTC-USDC " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "btcusdt_1h_df['pair'] = 'BTC-USDT'\n", "btcusdc_1h_df['pair'] = 'BTC-USDC'\n", "\n", + "# Join datasets\n", "candles_1h_df = pd.concat([btcusdt_1h_df, btcusdc_1h_df])\n", "\n", + "# Validate result\n", "assert(\n", " isinstance(candles_1h_df, pd.DataFrame)\n", " and candles_1h_df.shape == (48, 7)\n", " and (candles_1h_df['pair'].unique() == ['BTC-USDT', 'BTC-USDC']).all()\n", - ")" + ")\n", + "\n", + "# Sort output by Close_time\n", + "candles_1h_df.sort_values('Close_time')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Plot something interesting... :bulb:" + "### Step 4: Advanced analytics and visualization\n", + "\n", + "Great! We have a dataset with 2 pairs. Let's think about what interesting things can be found there :bulb: and try to visualize it.\n", + "Feel free to use your favorite framework(s) for visualization." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Enrich dataset using Open FIGI API Interaction" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "ename": "JSONDecodeError", - "evalue": "Expecting value: line 1 column 1 (char 0)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/home/dictator/apps/resistance/src/candidate.ipynb Cell 24'\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m json_response \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mjson()\n\u001b[1;32m 7\u001b[0m \u001b[39mreturn\u001b[39;00m pd\u001b[39m.\u001b[39mDataFrame\u001b[39m.\u001b[39mfrom_dict(json_response[\u001b[39m'\u001b[39m\u001b[39mresult\u001b[39m\u001b[39m'\u001b[39m], orient\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m----> 9\u001b[0m send_request(\u001b[39m'\u001b[39;49m\u001b[39mBTCUSDT\u001b[39;49m\u001b[39m'\u001b[39;49m)\n", - "\u001b[1;32m/home/dictator/apps/resistance/src/candidate.ipynb Cell 24'\u001b[0m in \u001b[0;36msend_request\u001b[0;34m(ticker)\u001b[0m\n\u001b[1;32m 2\u001b[0m api_url \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mhttps://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22Curncy%22&num_rows=100&simpleSearchString=\u001b[39m\u001b[39m{\u001b[39;00mticker\u001b[39m}\u001b[39;00m\u001b[39m&start=0\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 4\u001b[0m response \u001b[39m=\u001b[39m httpx\u001b[39m.\u001b[39mget(api_url)\n\u001b[0;32m----> 5\u001b[0m json_response \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39;49mjson()\n\u001b[1;32m 7\u001b[0m \u001b[39mreturn\u001b[39;00m pd\u001b[39m.\u001b[39mDataFrame\u001b[39m.\u001b[39mfrom_dict(json_response[\u001b[39m'\u001b[39m\u001b[39mresult\u001b[39m\u001b[39m'\u001b[39m], orient\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m'\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/httpx/_models.py:1517\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 1515\u001b[0m \u001b[39mif\u001b[39;00m encoding \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m jsonlib\u001b[39m.\u001b[39mloads(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcontent\u001b[39m.\u001b[39mdecode(encoding), \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m-> 1517\u001b[0m \u001b[39mreturn\u001b[39;00m jsonlib\u001b[39m.\u001b[39;49mloads(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtext, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m~/miniconda3/lib/python3.9/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 341\u001b[0m s \u001b[39m=\u001b[39m s\u001b[39m.\u001b[39mdecode(detect_encoding(s), \u001b[39m'\u001b[39m\u001b[39msurrogatepass\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 343\u001b[0m \u001b[39mif\u001b[39;00m (\u001b[39mcls\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m object_hook \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m\n\u001b[1;32m 344\u001b[0m parse_int \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m parse_float \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m\n\u001b[1;32m 345\u001b[0m parse_constant \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m object_pairs_hook \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m \u001b[39mreturn\u001b[39;00m _default_decoder\u001b[39m.\u001b[39;49mdecode(s)\n\u001b[1;32m 347\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mcls\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 348\u001b[0m \u001b[39mcls\u001b[39m \u001b[39m=\u001b[39m JSONDecoder\n", - "File \u001b[0;32m~/miniconda3/lib/python3.9/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdecode\u001b[39m(\u001b[39mself\u001b[39m, s, _w\u001b[39m=\u001b[39mWHITESPACE\u001b[39m.\u001b[39mmatch):\n\u001b[1;32m 333\u001b[0m \u001b[39m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[39m containing a JSON document).\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \n\u001b[1;32m 336\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m obj, end \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mraw_decode(s, idx\u001b[39m=\u001b[39;49m_w(s, \u001b[39m0\u001b[39;49m)\u001b[39m.\u001b[39;49mend())\n\u001b[1;32m 338\u001b[0m end \u001b[39m=\u001b[39m _w(s, end)\u001b[39m.\u001b[39mend()\n\u001b[1;32m 339\u001b[0m \u001b[39mif\u001b[39;00m end \u001b[39m!=\u001b[39m \u001b[39mlen\u001b[39m(s):\n", - "File \u001b[0;32m~/miniconda3/lib/python3.9/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 353\u001b[0m obj, end \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mscan_once(s, idx)\n\u001b[1;32m 354\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m \u001b[39mraise\u001b[39;00m JSONDecodeError(\u001b[39m\"\u001b[39m\u001b[39mExpecting value\u001b[39m\u001b[39m\"\u001b[39m, s, err\u001b[39m.\u001b[39mvalue) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39m\n\u001b[1;32m 356\u001b[0m \u001b[39mreturn\u001b[39;00m obj, end\n", - "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)" - ] - } - ], - "source": [ - "def send_request(ticker: str) -> pd.DataFrame:\n", - " api_url = f'https://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22Curncy%22&num_rows=100&simpleSearchString={ticker}&start=0'\n", + "## Conclusion\n", "\n", - " response = httpx.get(api_url)\n", - " json_response = response.json()\n", - " \n", - " return pd.DataFrame.from_dict(json_response['result'], orient='columns')\n", - "\n", - "send_request('BTCUSDT')" + "Great job! Thanks for your work and ideas. I hope it was an exciting journey!" ] } ], From 5451898a9dfe610c8a3bcbd9cc302cd549697c63 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Sun, 26 Jun 2022 13:18:10 +0000 Subject: [PATCH 10/17] Biance data crawlers (beta) --- src/binance_api_client.py | 29 + src/binance_open_data.ipynb | 299 +++++ src/candidate_tests.ipynb | 2176 ----------------------------------- 3 files changed, 328 insertions(+), 2176 deletions(-) create mode 100644 src/binance_api_client.py create mode 100644 src/binance_open_data.ipynb delete mode 100644 src/candidate_tests.ipynb diff --git a/src/binance_api_client.py b/src/binance_api_client.py new file mode 100644 index 0000000..5863e35 --- /dev/null +++ b/src/binance_api_client.py @@ -0,0 +1,29 @@ +# %% +import numpy as np +import pandas as pd +import time + +from binance.client import Client + + +# %% +api_key = "****" +secret_key = "***" + +client = Client(api_key, secret_key) + + +# %% +coins_response = client.get_all_coins_info() +coins_df = pd.DataFrame.from_dict(coins_response, orient='columns') + + +# %% +pairs_list = coins_df.coin.apply(lambda x: f"{x}USDT") +client.get_historical_klines( + 'BTCUSDT', + interval=Client.KLINE_INTERVAL_1HOUR, + start_str='2022-04-21', + end_str='2022-04-22' +) + diff --git a/src/binance_open_data.ipynb b/src/binance_open_data.ipynb new file mode 100644 index 0000000..dfe4396 --- /dev/null +++ b/src/binance_open_data.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Binance Open Data lab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Binance Open Data](https://github.com/binance/binance-public-data/#klines) and analyze it.\n", + "\n", + "### Stet 1. Download data \n", + "\n", + "Downloading __1-minute candles__ for `BTC/USDT` and `BTC/UDSC` using `bash` or `powershell` scripts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "#!/bin/sh\n", + "\n", + "# create dir for data\n", + "!mkdir ../data\n", + "\n", + "# download data using GET request\n", + "!wget -N -P ../data https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", + "!wget -N -P../data https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", + "\n", + "# unzip\n", + "!unzip -o -d ../data ../data/BTCUSDT-1m-2022-06-21.zip \n", + "!unzip -o -d ../data ../data/BTCUSDC-1m-2022-06-21.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Import data to Dataframe \n", + "\n", + "Import packages for data analysis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import httpx\n", + "\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import data from CSV file to Pandas DataFrame:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_data(pair: str) -> pd.DataFrame:\n", + " return pd.read_csv(f'../data/{pair}-1m-2022-06-21.csv', header = None)\n", + "\n", + "btcusdt_df = get_data('BTCUSDT')\n", + "btcusdt_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set names to columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def set_column_names(df: pd.DataFrame) -> pd.DataFrame:\n", + " column_names_mapping = {\n", + " 0: 'Open_time',\n", + " 1: 'Open',\n", + " 2: 'High',\n", + " 3: 'Low',\n", + " 4: 'Close',\n", + " 5: 'Volume',\n", + " 6: 'Close_time',\n", + " 7: 'Quote_asset_volume',\n", + " 8: 'Number_of_trades',\n", + " 9: 'Taker_buy_base_asset_volume',\n", + " 10: 'Taker_buy_quote_asset_volume',\n", + " 11: 'Ignore'\n", + " }\n", + " return df.rename(columns=column_names_mapping)\n", + "\n", + "btcusdt_df = set_column_names(btcusdt_df)\n", + "btcusdt_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Convert timestamp to human-readable date and time format:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "btcusdt_df['Open_time'] = btcusdt_df.iloc[:, 0].apply(lambda t: datetime.fromtimestamp(t/1000))\n", + "btcusdt_df['Close_time'] = btcusdt_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000))\n", + "\n", + "btcusdt_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at _Descriptive statistics_ (min, mean, max, standard deviation):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "btcusdt_df.describe(datetime_is_numeric=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Transform data\n", + "\n", + "Calculate __1-hour OHLCV__ candles:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_ohclv(df: pd.DataFrame) -> pd.DataFrame:\n", + " df['hour'] = df['Close_time'].apply(lambda t: t.hour)\n", + "\n", + " return (\n", + " df\n", + " .groupby(['hour'])\n", + " .agg(\n", + " {\n", + " 'Open': 'first',\n", + " 'High': max,\n", + " 'Low': min,\n", + " 'Close': 'last',\n", + " 'Volume': sum,\n", + " 'Close_time': max\n", + " }\n", + " )\n", + " .reset_index()\n", + " .drop(columns=['hour'])\n", + " )\n", + "\n", + "btcusdt_1h_df = calculate_ohclv(btcusdt_df)\n", + "\n", + "btcusdt_1h_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data validation is very important. Let's write domain-driven asserts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert(\n", + " isinstance(btcusdt_1h_df, pd.DataFrame)\n", + " and btcusdt_1h_df.shape == (24, 6)\n", + " and not btcusdt_1h_df.isnull().any().any()\n", + " and btcusdt_1h_df.iloc[:, 0:5].ge(0).all().all()\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Expand the dataset with information about `BTC/USDC` \n", + "\n", + "Download `BTC/USDC` 1-minute candles and transform it to 1-hour candles:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "btcusdc_df = get_data('BTCUSDC') # download data\n", + "btcusdc_df = set_column_names(btcusdc_df) # set column names\n", + "btcusdc_df['Close_time'] = btcusdc_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000)) # convert timestamp to date+time\n", + "\n", + "btcusdc_1h_df = calculate_ohclv(btcusdc_df) # calculate 1h OHCLV candles\n", + "btcusdc_1h_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Join altogether:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "btcusdt_1h_df['pair'] = 'BTC-USDT'\n", + "btcusdc_1h_df['pair'] = 'BTC-USDC'\n", + "\n", + "# Join datasets\n", + "candles_1h_df = pd.concat([btcusdt_1h_df, btcusdc_1h_df])\n", + "\n", + "# Validate result\n", + "assert(\n", + " isinstance(candles_1h_df, pd.DataFrame)\n", + " and candles_1h_df.shape == (48, 7)\n", + " and (candles_1h_df['pair'].unique() == ['BTC-USDT', 'BTC-USDC']).all()\n", + ")\n", + "\n", + "# Sort output by Close_time\n", + "candles_1h_df.sort_values('Close_time')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.13 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "6fd7ff10be7e3a66c1b3745c4cbc00041a2589eb74ab4be46a3698a7b56001aa" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/candidate_tests.ipynb b/src/candidate_tests.ipynb deleted file mode 100644 index 4b9e15e..0000000 --- a/src/candidate_tests.ipynb +++ /dev/null @@ -1,2176 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Analyst Code Interview\n", - "\n", - "Hi,\n", - "\n", - "It's a simple code interview. I shouldn't take you more than an hour, and it's fun :)\n", - "\n", - "## Part I. Quizzes\n", - "\n", - "If you didn't have `Python badge` on LinkedIn then it's time [to do it](https://www.linkedin.com/skill-assessments/hub/quizzes/)! Please share your results with us. By the way, here's my badge.\n", - "\n", - "![](../docs/li.png)\n", - "\n", - "If you don't have a LinkedIn account or like quizzes, then take another test from [W3 School](https://www.w3schools.com/quiztest/quiztest.asp?qtest=PANDAS).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Part II. Coding\n", - "\n", - "Please share the screen with us and let's repair our program together. \n", - "You can use google.com, StackOverflow, or your favorite IDE. Please use a version of Python greater or equals than 3.8\n", - "\n", - "We intend to collect data from [Binance Open Data](https://github.com/binance/binance-public-data/#klines) and analyze it.\n", - "\n", - "### Stet 1. Download data \n", - "\n", - "Downloading __1-minute candles__ for `BTC/USDT` and `BTC/UDSC` using `bash` or `powershell` scripts:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "vscode": { - "languageId": "shellscript" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mkdir: cannot create directory ‘../data’: File exists\n", - "--2022-06-24 11:22:56-- https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", - "Resolving data.binance.vision (data.binance.vision)... 108.138.94.79, 108.138.94.48, 108.138.94.67, ...\n", - "Connecting to data.binance.vision (data.binance.vision)|108.138.94.79|:443... connected.\n", - "HTTP request sent, awaiting response... 304 Not Modified\n", - "File ‘../data/BTCUSDT-1m-2022-06-21.zip’ not modified on server. Omitting download.\n", - "\n", - "--2022-06-24 11:22:57-- https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", - "Resolving data.binance.vision (data.binance.vision)... 108.138.94.88, 108.138.94.67, 108.138.94.48, ...\n", - "Connecting to data.binance.vision (data.binance.vision)|108.138.94.88|:443... connected.\n", - "HTTP request sent, awaiting response... 304 Not Modified\n", - "File ‘../data/BTCUSDC-1m-2022-06-21.zip’ not modified on server. Omitting download.\n", - "\n", - "Archive: ../data/BTCUSDT-1m-2022-06-21.zip\n", - " inflating: ../data/BTCUSDT-1m-2022-06-21.csv \n", - "Archive: ../data/BTCUSDC-1m-2022-06-21.zip\n", - " inflating: ../data/BTCUSDC-1m-2022-06-21.csv \n" - ] - } - ], - "source": [ - "#!/bin/sh\n", - "\n", - "# create dir for data\n", - "!mkdir ../data\n", - "\n", - "# download data using GET request\n", - "!wget -N -P ../data https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", - "!wget -N -P../data https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", - "\n", - "# unzip\n", - "!unzip -o -d ../data ../data/BTCUSDT-1m-2022-06-21.zip \n", - "!unzip -o -d ../data ../data/BTCUSDC-1m-2022-06-21.zip" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 2: Import data to Dataframe \n", - "\n", - "Import packages for data analysis:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "import httpx\n", - "\n", - "from datetime import datetime" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import data from CSV file to Pandas DataFrame:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
01234567891011
0165576960000020573.9020590.0020552.1720558.3670.7692516557696599991.455321e+06115037.368217.683845e+050
1165576966000020558.3520611.2120558.3520606.70118.0603216557697199992.430514e+06140261.257601.260950e+060
2165576972000020606.6920626.8920552.4020552.40130.4289416557697799992.686026e+06143355.805731.149409e+060
3165576978000020552.4120585.6920539.0920578.89103.5631816557698399992.128819e+06130164.573461.327338e+060
4165576984000020578.8920579.9020537.5720554.4683.5550916557698999991.717907e+06109836.409447.485065e+050
\n", - "
" - ], - "text/plain": [ - " 0 1 2 3 4 5 \\\n", - "0 1655769600000 20573.90 20590.00 20552.17 20558.36 70.76925 \n", - "1 1655769660000 20558.35 20611.21 20558.35 20606.70 118.06032 \n", - "2 1655769720000 20606.69 20626.89 20552.40 20552.40 130.42894 \n", - "3 1655769780000 20552.41 20585.69 20539.09 20578.89 103.56318 \n", - "4 1655769840000 20578.89 20579.90 20537.57 20554.46 83.55509 \n", - "\n", - " 6 7 8 9 10 11 \n", - "0 1655769659999 1.455321e+06 1150 37.36821 7.683845e+05 0 \n", - "1 1655769719999 2.430514e+06 1402 61.25760 1.260950e+06 0 \n", - "2 1655769779999 2.686026e+06 1433 55.80573 1.149409e+06 0 \n", - "3 1655769839999 2.128819e+06 1301 64.57346 1.327338e+06 0 \n", - "4 1655769899999 1.717907e+06 1098 36.40944 7.485065e+05 0 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def get_data(pair: str) -> pd.DataFrame:\n", - " return pd.read_csv(f'../data/binance/{pair}-1m-2022-06-21.csv', header = None)\n", - "\n", - "btcusdt_df = get_data('BTCUSDT')\n", - "btcusdt_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set names to columns:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Open_timeOpenHighLowCloseVolumeClose_timeQuote_asset_volumeNumber_of_tradesTaker_buy_base_asset_volumeTaker_buy_quote_asset_volumeIgnore
0165576960000020573.9020590.0020552.1720558.3670.7692516557696599991.455321e+06115037.368217.683845e+050
1165576966000020558.3520611.2120558.3520606.70118.0603216557697199992.430514e+06140261.257601.260950e+060
2165576972000020606.6920626.8920552.4020552.40130.4289416557697799992.686026e+06143355.805731.149409e+060
3165576978000020552.4120585.6920539.0920578.89103.5631816557698399992.128819e+06130164.573461.327338e+060
4165576984000020578.8920579.9020537.5720554.4683.5550916557698999991.717907e+06109836.409447.485065e+050
\n", - "
" - ], - "text/plain": [ - " Open_time Open High Low Close Volume \\\n", - "0 1655769600000 20573.90 20590.00 20552.17 20558.36 70.76925 \n", - "1 1655769660000 20558.35 20611.21 20558.35 20606.70 118.06032 \n", - "2 1655769720000 20606.69 20626.89 20552.40 20552.40 130.42894 \n", - "3 1655769780000 20552.41 20585.69 20539.09 20578.89 103.56318 \n", - "4 1655769840000 20578.89 20579.90 20537.57 20554.46 83.55509 \n", - "\n", - " Close_time Quote_asset_volume Number_of_trades \\\n", - "0 1655769659999 1.455321e+06 1150 \n", - "1 1655769719999 2.430514e+06 1402 \n", - "2 1655769779999 2.686026e+06 1433 \n", - "3 1655769839999 2.128819e+06 1301 \n", - "4 1655769899999 1.717907e+06 1098 \n", - "\n", - " Taker_buy_base_asset_volume Taker_buy_quote_asset_volume Ignore \n", - "0 37.36821 7.683845e+05 0 \n", - "1 61.25760 1.260950e+06 0 \n", - "2 55.80573 1.149409e+06 0 \n", - "3 64.57346 1.327338e+06 0 \n", - "4 36.40944 7.485065e+05 0 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def set_column_names(df: pd.DataFrame) -> pd.DataFrame:\n", - " column_names_mapping = {\n", - " 0: 'Open_time',\n", - " 1: 'Open',\n", - " 2: 'High',\n", - " 3: 'Low',\n", - " 4: 'Close',\n", - " 5: 'Volume',\n", - " 6: 'Close_time',\n", - " 7: 'Quote_asset_volume',\n", - " 8: 'Number_of_trades',\n", - " 9: 'Taker_buy_base_asset_volume',\n", - " 10: 'Taker_buy_quote_asset_volume',\n", - " 11: 'Ignore'\n", - " }\n", - " return df.rename(columns=column_names_mapping)\n", - "\n", - "btcusdt_df = set_column_names(btcusdt_df)\n", - "btcusdt_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Convert timestamp to human-readable date and time format:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Open_timeOpenHighLowCloseVolumeClose_timeQuote_asset_volumeNumber_of_tradesTaker_buy_base_asset_volumeTaker_buy_quote_asset_volumeIgnore
02022-06-21 00:00:0020573.9020590.0020552.1720558.3670.769252022-06-21 00:00:59.9991.455321e+06115037.368217.683845e+050
12022-06-21 00:01:0020558.3520611.2120558.3520606.70118.060322022-06-21 00:01:59.9992.430514e+06140261.257601.260950e+060
22022-06-21 00:02:0020606.6920626.8920552.4020552.40130.428942022-06-21 00:02:59.9992.686026e+06143355.805731.149409e+060
32022-06-21 00:03:0020552.4120585.6920539.0920578.89103.563182022-06-21 00:03:59.9992.128819e+06130164.573461.327338e+060
42022-06-21 00:04:0020578.8920579.9020537.5720554.4683.555092022-06-21 00:04:59.9991.717907e+06109836.409447.485065e+050
\n", - "
" - ], - "text/plain": [ - " Open_time Open High Low Close Volume \\\n", - "0 2022-06-21 00:00:00 20573.90 20590.00 20552.17 20558.36 70.76925 \n", - "1 2022-06-21 00:01:00 20558.35 20611.21 20558.35 20606.70 118.06032 \n", - "2 2022-06-21 00:02:00 20606.69 20626.89 20552.40 20552.40 130.42894 \n", - "3 2022-06-21 00:03:00 20552.41 20585.69 20539.09 20578.89 103.56318 \n", - "4 2022-06-21 00:04:00 20578.89 20579.90 20537.57 20554.46 83.55509 \n", - "\n", - " Close_time Quote_asset_volume Number_of_trades \\\n", - "0 2022-06-21 00:00:59.999 1.455321e+06 1150 \n", - "1 2022-06-21 00:01:59.999 2.430514e+06 1402 \n", - "2 2022-06-21 00:02:59.999 2.686026e+06 1433 \n", - "3 2022-06-21 00:03:59.999 2.128819e+06 1301 \n", - "4 2022-06-21 00:04:59.999 1.717907e+06 1098 \n", - "\n", - " Taker_buy_base_asset_volume Taker_buy_quote_asset_volume Ignore \n", - "0 37.36821 7.683845e+05 0 \n", - "1 61.25760 1.260950e+06 0 \n", - "2 55.80573 1.149409e+06 0 \n", - "3 64.57346 1.327338e+06 0 \n", - "4 36.40944 7.485065e+05 0 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "btcusdt_df['Open_time'] = btcusdt_df.iloc[:, 0].apply(lambda t: datetime.fromtimestamp(t/1000))\n", - "btcusdt_df['Close_time'] = btcusdt_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000))\n", - "\n", - "btcusdt_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's take a look at _Descriptive statistics_ (min, mean, max, standard deviation):" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Open_timeOpenHighLowCloseVolumeClose_timeQuote_asset_volumeNumber_of_tradesTaker_buy_base_asset_volumeTaker_buy_quote_asset_volumeIgnore
count14401440.0000001440.0000001440.0000001440.0000001440.00000014401.440000e+031440.0000001440.0000001.440000e+031440.0
mean2022-06-21 11:59:3021014.35546521033.87731920996.87103521014.45808372.4794352022-06-21 12:00:29.9990003201.526793e+061052.26458336.2452087.635009e+050.0
min2022-06-21 00:00:0020377.84000020396.83000020348.40000020377.8500005.3914202022-06-21 00:00:59.9990001.128460e+05227.0000001.9779604.127662e+040.0
25%2022-06-21 05:59:4520737.96500020763.56750020713.95500020737.97000034.1392272022-06-21 06:00:44.9990000647.153174e+05636.00000015.9056503.315046e+050.0
50%2022-06-21 11:59:3021073.58500021095.07500021052.21000021073.59000052.6157302022-06-21 12:00:29.9990000641.100389e+06859.00000024.9484955.249121e+050.0
75%2022-06-21 17:59:1521241.47000021260.69000021224.11500021241.46750082.0582072022-06-21 18:00:14.9990000641.718243e+061225.25000039.8596558.356794e+050.0
max2022-06-21 23:59:0021691.55000021723.00000021631.91000021691.550000732.1814002022-06-21 23:59:59.9990001.571181e+078776.000000471.9332101.013001e+070.0
stdNaN325.066922325.563767324.575912324.95290871.230479NaN1.511238e+06736.29048041.5594218.819089e+050.0
\n", - "
" - ], - "text/plain": [ - " Open_time Open High Low \\\n", - "count 1440 1440.000000 1440.000000 1440.000000 \n", - "mean 2022-06-21 11:59:30 21014.355465 21033.877319 20996.871035 \n", - "min 2022-06-21 00:00:00 20377.840000 20396.830000 20348.400000 \n", - "25% 2022-06-21 05:59:45 20737.965000 20763.567500 20713.955000 \n", - "50% 2022-06-21 11:59:30 21073.585000 21095.075000 21052.210000 \n", - "75% 2022-06-21 17:59:15 21241.470000 21260.690000 21224.115000 \n", - "max 2022-06-21 23:59:00 21691.550000 21723.000000 21631.910000 \n", - "std NaN 325.066922 325.563767 324.575912 \n", - "\n", - " Close Volume Close_time \\\n", - "count 1440.000000 1440.000000 1440 \n", - "mean 21014.458083 72.479435 2022-06-21 12:00:29.999000320 \n", - "min 20377.850000 5.391420 2022-06-21 00:00:59.999000 \n", - "25% 20737.970000 34.139227 2022-06-21 06:00:44.999000064 \n", - "50% 21073.590000 52.615730 2022-06-21 12:00:29.999000064 \n", - "75% 21241.467500 82.058207 2022-06-21 18:00:14.999000064 \n", - "max 21691.550000 732.181400 2022-06-21 23:59:59.999000 \n", - "std 324.952908 71.230479 NaN \n", - "\n", - " Quote_asset_volume Number_of_trades Taker_buy_base_asset_volume \\\n", - "count 1.440000e+03 1440.000000 1440.000000 \n", - "mean 1.526793e+06 1052.264583 36.245208 \n", - "min 1.128460e+05 227.000000 1.977960 \n", - "25% 7.153174e+05 636.000000 15.905650 \n", - "50% 1.100389e+06 859.000000 24.948495 \n", - "75% 1.718243e+06 1225.250000 39.859655 \n", - "max 1.571181e+07 8776.000000 471.933210 \n", - "std 1.511238e+06 736.290480 41.559421 \n", - "\n", - " Taker_buy_quote_asset_volume Ignore \n", - "count 1.440000e+03 1440.0 \n", - "mean 7.635009e+05 0.0 \n", - "min 4.127662e+04 0.0 \n", - "25% 3.315046e+05 0.0 \n", - "50% 5.249121e+05 0.0 \n", - "75% 8.356794e+05 0.0 \n", - "max 1.013001e+07 0.0 \n", - "std 8.819089e+05 0.0 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "btcusdt_df.describe(datetime_is_numeric=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 2: Transform data\n", - "\n", - "Calculate __1-hour OHLCV__ candles:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OpenHighLowCloseVolumeClose_time
020573.9020705.7420396.7720672.304235.773922022-06-21 00:59:59.999
120672.3020783.9920504.8920670.883892.914122022-06-21 01:59:59.999
220670.8720699.9320348.4020433.492876.889282022-06-21 02:59:59.999
320433.4920665.2620365.0020614.043306.770182022-06-21 03:59:59.999
420614.0420740.7220474.4120656.172925.335422022-06-21 04:59:59.999
520656.1721029.9320621.1420890.776516.656112022-06-21 05:59:59.999
620890.7721202.0020890.7721192.086114.516622022-06-21 06:59:59.999
721192.0821333.0020952.4621120.285433.086032022-06-21 07:59:59.999
821120.2821500.0121051.2521356.326241.368012022-06-21 08:59:59.999
921356.3221470.0021166.9421200.004961.574962022-06-21 09:59:59.999
1021200.0121307.8321061.5421299.273982.475782022-06-21 10:59:59.999
1121299.0221361.0320911.1320962.424685.425242022-06-21 11:59:59.999
1220962.4221163.2420853.3621106.205596.263032022-06-21 12:59:59.999
1321106.1921650.0020995.6421619.318667.590802022-06-21 13:59:59.999
1421619.3121723.0021427.8221590.796295.024292022-06-21 14:59:59.999
1521590.7921604.2721339.0721392.193809.546222022-06-21 15:59:59.999
1621392.1821550.3921355.7721401.522421.776292022-06-21 16:59:59.999
1721401.5221457.8221195.7021242.943755.829192022-06-21 17:59:59.999
1821242.9321256.9921076.4821100.012820.406752022-06-21 18:59:59.999
1921100.0021306.5120870.0120888.644015.335282022-06-21 19:59:59.999
2020888.6320987.3820666.0020859.864442.875962022-06-21 20:59:59.999
2120859.8621054.9920808.0020972.911813.562362022-06-21 21:59:59.999
2220972.9121003.7020741.0320897.002945.616502022-06-21 22:59:59.999
2320897.0020943.1720551.0020723.522613.774412022-06-21 23:59:59.999
\n", - "
" - ], - "text/plain": [ - " Open High Low Close Volume Close_time\n", - "0 20573.90 20705.74 20396.77 20672.30 4235.77392 2022-06-21 00:59:59.999\n", - "1 20672.30 20783.99 20504.89 20670.88 3892.91412 2022-06-21 01:59:59.999\n", - "2 20670.87 20699.93 20348.40 20433.49 2876.88928 2022-06-21 02:59:59.999\n", - "3 20433.49 20665.26 20365.00 20614.04 3306.77018 2022-06-21 03:59:59.999\n", - "4 20614.04 20740.72 20474.41 20656.17 2925.33542 2022-06-21 04:59:59.999\n", - "5 20656.17 21029.93 20621.14 20890.77 6516.65611 2022-06-21 05:59:59.999\n", - "6 20890.77 21202.00 20890.77 21192.08 6114.51662 2022-06-21 06:59:59.999\n", - "7 21192.08 21333.00 20952.46 21120.28 5433.08603 2022-06-21 07:59:59.999\n", - "8 21120.28 21500.01 21051.25 21356.32 6241.36801 2022-06-21 08:59:59.999\n", - "9 21356.32 21470.00 21166.94 21200.00 4961.57496 2022-06-21 09:59:59.999\n", - "10 21200.01 21307.83 21061.54 21299.27 3982.47578 2022-06-21 10:59:59.999\n", - "11 21299.02 21361.03 20911.13 20962.42 4685.42524 2022-06-21 11:59:59.999\n", - "12 20962.42 21163.24 20853.36 21106.20 5596.26303 2022-06-21 12:59:59.999\n", - "13 21106.19 21650.00 20995.64 21619.31 8667.59080 2022-06-21 13:59:59.999\n", - "14 21619.31 21723.00 21427.82 21590.79 6295.02429 2022-06-21 14:59:59.999\n", - "15 21590.79 21604.27 21339.07 21392.19 3809.54622 2022-06-21 15:59:59.999\n", - "16 21392.18 21550.39 21355.77 21401.52 2421.77629 2022-06-21 16:59:59.999\n", - "17 21401.52 21457.82 21195.70 21242.94 3755.82919 2022-06-21 17:59:59.999\n", - "18 21242.93 21256.99 21076.48 21100.01 2820.40675 2022-06-21 18:59:59.999\n", - "19 21100.00 21306.51 20870.01 20888.64 4015.33528 2022-06-21 19:59:59.999\n", - "20 20888.63 20987.38 20666.00 20859.86 4442.87596 2022-06-21 20:59:59.999\n", - "21 20859.86 21054.99 20808.00 20972.91 1813.56236 2022-06-21 21:59:59.999\n", - "22 20972.91 21003.70 20741.03 20897.00 2945.61650 2022-06-21 22:59:59.999\n", - "23 20897.00 20943.17 20551.00 20723.52 2613.77441 2022-06-21 23:59:59.999" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def calculate_ohclv(df: pd.DataFrame) -> pd.DataFrame:\n", - " df['hour'] = df['Close_time'].apply(lambda t: t.hour)\n", - "\n", - " return (\n", - " df\n", - " .groupby(['hour'])\n", - " .agg(\n", - " {\n", - " 'Open': 'first',\n", - " 'High': max,\n", - " 'Low': min,\n", - " 'Close': 'last',\n", - " 'Volume': sum,\n", - " 'Close_time': max\n", - " }\n", - " )\n", - " .reset_index()\n", - " .drop(columns=['hour'])\n", - " )\n", - "\n", - "btcusdt_1h_df = calculate_ohclv(btcusdt_df)\n", - "\n", - "btcusdt_1h_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Data validation is very important. Let's write domain-driven asserts:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "assert(\n", - " isinstance(btcusdt_1h_df, pd.DataFrame)\n", - " and btcusdt_1h_df.shape == (24, 6)\n", - " and not btcusdt_1h_df.isnull().any().any()\n", - " and btcusdt_1h_df.iloc[:, 0:5].ge(0).all().all()\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 3: Expand the dataset with information about `BTC/USDC` \n", - "\n", - "Download `BTC/USDC` 1-minute candles and transform it to 1-hour candles:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OpenHighLowCloseVolumeClose_time
020549.6520703.0820371.7520647.35284.735212022-06-21 00:59:59.999
120648.0620771.0720482.8320646.88192.505132022-06-21 01:59:59.999
220644.7820672.9820331.3620406.71195.670232022-06-21 02:59:59.999
320402.2720649.9920331.4820585.98290.392102022-06-21 03:59:59.999
420586.6320721.1020449.5720632.33205.784592022-06-21 04:59:59.999
520640.3320999.0020594.5820866.07412.424772022-06-21 05:59:59.999
620873.2521178.0020873.2521169.64524.258942022-06-21 06:59:59.999
721166.5821300.0020913.4021097.44304.651132022-06-21 07:59:59.999
821097.2021471.9121023.6421327.56366.292012022-06-21 08:59:59.999
921342.7621448.2221144.2421182.12350.775432022-06-21 09:59:59.999
1021175.7521298.5221038.3021270.98526.666022022-06-21 10:59:59.999
1121273.7321336.9420887.1720948.96579.002002022-06-21 11:59:59.999
1220936.5921143.9920800.0021079.42452.072142022-06-21 12:59:59.999
1321079.4121629.3620968.2121592.69507.911902022-06-21 13:59:59.999
1421595.1321699.9821394.0521572.00445.739782022-06-21 14:59:59.999
1521571.9621671.3421314.9421370.87435.992372022-06-21 15:59:59.999
1621370.0821533.1421330.0021377.52243.103682022-06-21 16:59:59.999
1721377.8521434.5821168.5921220.14291.461372022-06-21 17:59:59.999
1821220.1421233.9421054.7221074.20423.058362022-06-21 18:59:59.999
1921074.8121279.5220851.1020866.39266.464882022-06-21 19:59:59.999
2020864.3120960.9820645.0320838.52330.795692022-06-21 20:59:59.999
2120838.5121057.0120780.2920958.2299.098362022-06-21 21:59:59.999
2220950.0720975.9220719.0220875.37177.082032022-06-21 22:59:59.999
2320880.7120916.8520527.9020699.78173.227972022-06-21 23:59:59.999
\n", - "
" - ], - "text/plain": [ - " Open High Low Close Volume Close_time\n", - "0 20549.65 20703.08 20371.75 20647.35 284.73521 2022-06-21 00:59:59.999\n", - "1 20648.06 20771.07 20482.83 20646.88 192.50513 2022-06-21 01:59:59.999\n", - "2 20644.78 20672.98 20331.36 20406.71 195.67023 2022-06-21 02:59:59.999\n", - "3 20402.27 20649.99 20331.48 20585.98 290.39210 2022-06-21 03:59:59.999\n", - "4 20586.63 20721.10 20449.57 20632.33 205.78459 2022-06-21 04:59:59.999\n", - "5 20640.33 20999.00 20594.58 20866.07 412.42477 2022-06-21 05:59:59.999\n", - "6 20873.25 21178.00 20873.25 21169.64 524.25894 2022-06-21 06:59:59.999\n", - "7 21166.58 21300.00 20913.40 21097.44 304.65113 2022-06-21 07:59:59.999\n", - "8 21097.20 21471.91 21023.64 21327.56 366.29201 2022-06-21 08:59:59.999\n", - "9 21342.76 21448.22 21144.24 21182.12 350.77543 2022-06-21 09:59:59.999\n", - "10 21175.75 21298.52 21038.30 21270.98 526.66602 2022-06-21 10:59:59.999\n", - "11 21273.73 21336.94 20887.17 20948.96 579.00200 2022-06-21 11:59:59.999\n", - "12 20936.59 21143.99 20800.00 21079.42 452.07214 2022-06-21 12:59:59.999\n", - "13 21079.41 21629.36 20968.21 21592.69 507.91190 2022-06-21 13:59:59.999\n", - "14 21595.13 21699.98 21394.05 21572.00 445.73978 2022-06-21 14:59:59.999\n", - "15 21571.96 21671.34 21314.94 21370.87 435.99237 2022-06-21 15:59:59.999\n", - "16 21370.08 21533.14 21330.00 21377.52 243.10368 2022-06-21 16:59:59.999\n", - "17 21377.85 21434.58 21168.59 21220.14 291.46137 2022-06-21 17:59:59.999\n", - "18 21220.14 21233.94 21054.72 21074.20 423.05836 2022-06-21 18:59:59.999\n", - "19 21074.81 21279.52 20851.10 20866.39 266.46488 2022-06-21 19:59:59.999\n", - "20 20864.31 20960.98 20645.03 20838.52 330.79569 2022-06-21 20:59:59.999\n", - "21 20838.51 21057.01 20780.29 20958.22 99.09836 2022-06-21 21:59:59.999\n", - "22 20950.07 20975.92 20719.02 20875.37 177.08203 2022-06-21 22:59:59.999\n", - "23 20880.71 20916.85 20527.90 20699.78 173.22797 2022-06-21 23:59:59.999" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "btcusdc_df = get_data('BTCUSDC') # download data\n", - "btcusdc_df = set_column_names(btcusdc_df) # set column names\n", - "btcusdc_df['Close_time'] = btcusdc_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000)) # convert timestamp to date+time\n", - "\n", - "btcusdc_1h_df = calculate_ohclv(btcusdc_df) # calculate 1h OHCLV candles\n", - "btcusdc_1h_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Join altogether:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OpenHighLowCloseVolumeClose_timepair
020573.9020705.7420396.7720672.304235.773922022-06-21 00:59:59.999BTC-USDT
020549.6520703.0820371.7520647.35284.735212022-06-21 00:59:59.999BTC-USDC
120648.0620771.0720482.8320646.88192.505132022-06-21 01:59:59.999BTC-USDC
120672.3020783.9920504.8920670.883892.914122022-06-21 01:59:59.999BTC-USDT
220670.8720699.9320348.4020433.492876.889282022-06-21 02:59:59.999BTC-USDT
220644.7820672.9820331.3620406.71195.670232022-06-21 02:59:59.999BTC-USDC
320433.4920665.2620365.0020614.043306.770182022-06-21 03:59:59.999BTC-USDT
320402.2720649.9920331.4820585.98290.392102022-06-21 03:59:59.999BTC-USDC
420614.0420740.7220474.4120656.172925.335422022-06-21 04:59:59.999BTC-USDT
420586.6320721.1020449.5720632.33205.784592022-06-21 04:59:59.999BTC-USDC
520656.1721029.9320621.1420890.776516.656112022-06-21 05:59:59.999BTC-USDT
520640.3320999.0020594.5820866.07412.424772022-06-21 05:59:59.999BTC-USDC
620890.7721202.0020890.7721192.086114.516622022-06-21 06:59:59.999BTC-USDT
620873.2521178.0020873.2521169.64524.258942022-06-21 06:59:59.999BTC-USDC
721192.0821333.0020952.4621120.285433.086032022-06-21 07:59:59.999BTC-USDT
721166.5821300.0020913.4021097.44304.651132022-06-21 07:59:59.999BTC-USDC
821097.2021471.9121023.6421327.56366.292012022-06-21 08:59:59.999BTC-USDC
821120.2821500.0121051.2521356.326241.368012022-06-21 08:59:59.999BTC-USDT
921342.7621448.2221144.2421182.12350.775432022-06-21 09:59:59.999BTC-USDC
921356.3221470.0021166.9421200.004961.574962022-06-21 09:59:59.999BTC-USDT
1021175.7521298.5221038.3021270.98526.666022022-06-21 10:59:59.999BTC-USDC
1021200.0121307.8321061.5421299.273982.475782022-06-21 10:59:59.999BTC-USDT
1121273.7321336.9420887.1720948.96579.002002022-06-21 11:59:59.999BTC-USDC
1121299.0221361.0320911.1320962.424685.425242022-06-21 11:59:59.999BTC-USDT
1220936.5921143.9920800.0021079.42452.072142022-06-21 12:59:59.999BTC-USDC
1220962.4221163.2420853.3621106.205596.263032022-06-21 12:59:59.999BTC-USDT
1321079.4121629.3620968.2121592.69507.911902022-06-21 13:59:59.999BTC-USDC
1321106.1921650.0020995.6421619.318667.590802022-06-21 13:59:59.999BTC-USDT
1421619.3121723.0021427.8221590.796295.024292022-06-21 14:59:59.999BTC-USDT
1421595.1321699.9821394.0521572.00445.739782022-06-21 14:59:59.999BTC-USDC
1521571.9621671.3421314.9421370.87435.992372022-06-21 15:59:59.999BTC-USDC
1521590.7921604.2721339.0721392.193809.546222022-06-21 15:59:59.999BTC-USDT
1621370.0821533.1421330.0021377.52243.103682022-06-21 16:59:59.999BTC-USDC
1621392.1821550.3921355.7721401.522421.776292022-06-21 16:59:59.999BTC-USDT
1721377.8521434.5821168.5921220.14291.461372022-06-21 17:59:59.999BTC-USDC
1721401.5221457.8221195.7021242.943755.829192022-06-21 17:59:59.999BTC-USDT
1821242.9321256.9921076.4821100.012820.406752022-06-21 18:59:59.999BTC-USDT
1821220.1421233.9421054.7221074.20423.058362022-06-21 18:59:59.999BTC-USDC
1921074.8121279.5220851.1020866.39266.464882022-06-21 19:59:59.999BTC-USDC
1921100.0021306.5120870.0120888.644015.335282022-06-21 19:59:59.999BTC-USDT
2020864.3120960.9820645.0320838.52330.795692022-06-21 20:59:59.999BTC-USDC
2020888.6320987.3820666.0020859.864442.875962022-06-21 20:59:59.999BTC-USDT
2120838.5121057.0120780.2920958.2299.098362022-06-21 21:59:59.999BTC-USDC
2120859.8621054.9920808.0020972.911813.562362022-06-21 21:59:59.999BTC-USDT
2220950.0720975.9220719.0220875.37177.082032022-06-21 22:59:59.999BTC-USDC
2220972.9121003.7020741.0320897.002945.616502022-06-21 22:59:59.999BTC-USDT
2320897.0020943.1720551.0020723.522613.774412022-06-21 23:59:59.999BTC-USDT
2320880.7120916.8520527.9020699.78173.227972022-06-21 23:59:59.999BTC-USDC
\n", - "
" - ], - "text/plain": [ - " Open High Low Close Volume \\\n", - "0 20573.90 20705.74 20396.77 20672.30 4235.77392 \n", - "0 20549.65 20703.08 20371.75 20647.35 284.73521 \n", - "1 20648.06 20771.07 20482.83 20646.88 192.50513 \n", - "1 20672.30 20783.99 20504.89 20670.88 3892.91412 \n", - "2 20670.87 20699.93 20348.40 20433.49 2876.88928 \n", - "2 20644.78 20672.98 20331.36 20406.71 195.67023 \n", - "3 20433.49 20665.26 20365.00 20614.04 3306.77018 \n", - "3 20402.27 20649.99 20331.48 20585.98 290.39210 \n", - "4 20614.04 20740.72 20474.41 20656.17 2925.33542 \n", - "4 20586.63 20721.10 20449.57 20632.33 205.78459 \n", - "5 20656.17 21029.93 20621.14 20890.77 6516.65611 \n", - "5 20640.33 20999.00 20594.58 20866.07 412.42477 \n", - "6 20890.77 21202.00 20890.77 21192.08 6114.51662 \n", - "6 20873.25 21178.00 20873.25 21169.64 524.25894 \n", - "7 21192.08 21333.00 20952.46 21120.28 5433.08603 \n", - "7 21166.58 21300.00 20913.40 21097.44 304.65113 \n", - "8 21097.20 21471.91 21023.64 21327.56 366.29201 \n", - "8 21120.28 21500.01 21051.25 21356.32 6241.36801 \n", - "9 21342.76 21448.22 21144.24 21182.12 350.77543 \n", - "9 21356.32 21470.00 21166.94 21200.00 4961.57496 \n", - "10 21175.75 21298.52 21038.30 21270.98 526.66602 \n", - "10 21200.01 21307.83 21061.54 21299.27 3982.47578 \n", - "11 21273.73 21336.94 20887.17 20948.96 579.00200 \n", - "11 21299.02 21361.03 20911.13 20962.42 4685.42524 \n", - "12 20936.59 21143.99 20800.00 21079.42 452.07214 \n", - "12 20962.42 21163.24 20853.36 21106.20 5596.26303 \n", - "13 21079.41 21629.36 20968.21 21592.69 507.91190 \n", - "13 21106.19 21650.00 20995.64 21619.31 8667.59080 \n", - "14 21619.31 21723.00 21427.82 21590.79 6295.02429 \n", - "14 21595.13 21699.98 21394.05 21572.00 445.73978 \n", - "15 21571.96 21671.34 21314.94 21370.87 435.99237 \n", - "15 21590.79 21604.27 21339.07 21392.19 3809.54622 \n", - "16 21370.08 21533.14 21330.00 21377.52 243.10368 \n", - "16 21392.18 21550.39 21355.77 21401.52 2421.77629 \n", - "17 21377.85 21434.58 21168.59 21220.14 291.46137 \n", - "17 21401.52 21457.82 21195.70 21242.94 3755.82919 \n", - "18 21242.93 21256.99 21076.48 21100.01 2820.40675 \n", - "18 21220.14 21233.94 21054.72 21074.20 423.05836 \n", - "19 21074.81 21279.52 20851.10 20866.39 266.46488 \n", - "19 21100.00 21306.51 20870.01 20888.64 4015.33528 \n", - "20 20864.31 20960.98 20645.03 20838.52 330.79569 \n", - "20 20888.63 20987.38 20666.00 20859.86 4442.87596 \n", - "21 20838.51 21057.01 20780.29 20958.22 99.09836 \n", - "21 20859.86 21054.99 20808.00 20972.91 1813.56236 \n", - "22 20950.07 20975.92 20719.02 20875.37 177.08203 \n", - "22 20972.91 21003.70 20741.03 20897.00 2945.61650 \n", - "23 20897.00 20943.17 20551.00 20723.52 2613.77441 \n", - "23 20880.71 20916.85 20527.90 20699.78 173.22797 \n", - "\n", - " Close_time pair \n", - "0 2022-06-21 00:59:59.999 BTC-USDT \n", - "0 2022-06-21 00:59:59.999 BTC-USDC \n", - "1 2022-06-21 01:59:59.999 BTC-USDC \n", - "1 2022-06-21 01:59:59.999 BTC-USDT \n", - "2 2022-06-21 02:59:59.999 BTC-USDT \n", - "2 2022-06-21 02:59:59.999 BTC-USDC \n", - "3 2022-06-21 03:59:59.999 BTC-USDT \n", - "3 2022-06-21 03:59:59.999 BTC-USDC \n", - "4 2022-06-21 04:59:59.999 BTC-USDT \n", - "4 2022-06-21 04:59:59.999 BTC-USDC \n", - "5 2022-06-21 05:59:59.999 BTC-USDT \n", - "5 2022-06-21 05:59:59.999 BTC-USDC \n", - "6 2022-06-21 06:59:59.999 BTC-USDT \n", - "6 2022-06-21 06:59:59.999 BTC-USDC \n", - "7 2022-06-21 07:59:59.999 BTC-USDT \n", - "7 2022-06-21 07:59:59.999 BTC-USDC \n", - "8 2022-06-21 08:59:59.999 BTC-USDC \n", - "8 2022-06-21 08:59:59.999 BTC-USDT \n", - "9 2022-06-21 09:59:59.999 BTC-USDC \n", - "9 2022-06-21 09:59:59.999 BTC-USDT \n", - "10 2022-06-21 10:59:59.999 BTC-USDC \n", - "10 2022-06-21 10:59:59.999 BTC-USDT \n", - "11 2022-06-21 11:59:59.999 BTC-USDC \n", - "11 2022-06-21 11:59:59.999 BTC-USDT \n", - "12 2022-06-21 12:59:59.999 BTC-USDC \n", - "12 2022-06-21 12:59:59.999 BTC-USDT \n", - "13 2022-06-21 13:59:59.999 BTC-USDC \n", - "13 2022-06-21 13:59:59.999 BTC-USDT \n", - "14 2022-06-21 14:59:59.999 BTC-USDT \n", - "14 2022-06-21 14:59:59.999 BTC-USDC \n", - "15 2022-06-21 15:59:59.999 BTC-USDC \n", - "15 2022-06-21 15:59:59.999 BTC-USDT \n", - "16 2022-06-21 16:59:59.999 BTC-USDC \n", - "16 2022-06-21 16:59:59.999 BTC-USDT \n", - "17 2022-06-21 17:59:59.999 BTC-USDC \n", - "17 2022-06-21 17:59:59.999 BTC-USDT \n", - "18 2022-06-21 18:59:59.999 BTC-USDT \n", - "18 2022-06-21 18:59:59.999 BTC-USDC \n", - "19 2022-06-21 19:59:59.999 BTC-USDC \n", - "19 2022-06-21 19:59:59.999 BTC-USDT \n", - "20 2022-06-21 20:59:59.999 BTC-USDC \n", - "20 2022-06-21 20:59:59.999 BTC-USDT \n", - "21 2022-06-21 21:59:59.999 BTC-USDC \n", - "21 2022-06-21 21:59:59.999 BTC-USDT \n", - "22 2022-06-21 22:59:59.999 BTC-USDC \n", - "22 2022-06-21 22:59:59.999 BTC-USDT \n", - "23 2022-06-21 23:59:59.999 BTC-USDT \n", - "23 2022-06-21 23:59:59.999 BTC-USDC " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "btcusdt_1h_df['pair'] = 'BTC-USDT'\n", - "btcusdc_1h_df['pair'] = 'BTC-USDC'\n", - "\n", - "# Join datasets\n", - "candles_1h_df = pd.concat([btcusdt_1h_df, btcusdc_1h_df])\n", - "\n", - "# Validate result\n", - "assert(\n", - " isinstance(candles_1h_df, pd.DataFrame)\n", - " and candles_1h_df.shape == (48, 7)\n", - " and (candles_1h_df['pair'].unique() == ['BTC-USDT', 'BTC-USDC']).all()\n", - ")\n", - "\n", - "# Sort output by Close_time\n", - "candles_1h_df.sort_values('Close_time')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 4: Advanced analytics and visualization\n", - "\n", - "Great! We have a dataset with 2 pairs. Let's think about what interesting things can be found there :bulb: and try to visualize it.\n", - "Feel free to use your favorite framework(s) for visualization." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Conclusion\n", - "\n", - "Great job! Thanks for your work and ideas. I hope it was an exciting journey!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.13 ('base')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "6fd7ff10be7e3a66c1b3745c4cbc00041a2589eb74ab4be46a3698a7b56001aa" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From c60c8fac55b959f439e7ad170a9a46d06ffb2fd7 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Wed, 6 Jul 2022 14:35:13 +0000 Subject: [PATCH 11/17] Add conn string definition --- src/openfigi_crawler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/openfigi_crawler.py b/src/openfigi_crawler.py index 42fbc4b..2662f7f 100644 --- a/src/openfigi_crawler.py +++ b/src/openfigi_crawler.py @@ -145,6 +145,7 @@ pair_figi_list = [figi_provider.search(p) for p in usd_pairs] # %% ---- +conn_settings = ConnectionSettings(server='****.database.windows.net', database='market-data-db', username='', password='****') db_conn = AzureDbConnection(conn_settings) db_conn.connect() From 138587cd6b6aab7525daba80e2ea7a7a51c1b722 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Mon, 17 Oct 2022 12:40:22 +0000 Subject: [PATCH 12/17] Update SQL driver and minor improvements --- src/azure.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/azure.py b/src/azure.py index 05e6f96..e962acd 100644 --- a/src/azure.py +++ b/src/azure.py @@ -17,7 +17,7 @@ class ConnectionSettings: database: str username: str password: str - driver: str = '{ODBC Driver 17 for SQL Server}' + driver: str = '{ODBC Driver 18 for SQL Server}' timeout: int = 30 @@ -28,10 +28,10 @@ class AzureDbConnection: def __init__(self, conn_settings: ConnectionSettings, echo: bool = False) -> None: conn_params = urllib.parse.quote_plus( 'Driver=%s;' % conn_settings.driver + - 'Server=tcp:%s,1433;' % conn_settings.server + + 'Server=tcp:%s.database.windows.net,1433;' % conn_settings.server + 'Database=%s;' % conn_settings.database + 'Uid=%s;' % conn_settings.username + - 'Pwd={%s};' % conn_settings.password + + 'Pwd=%s;' % conn_settings.password + 'Encrypt=yes;' + 'TrustServerCertificate=no;' + 'Connection Timeout=%s;' % conn_settings.timeout From aa7d45380b54111240c35b625b2508070308f455 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Mon, 17 Oct 2022 16:08:16 +0000 Subject: [PATCH 13/17] Remove redundant --- src/binance_api_client.py | 29 ------- src/binance_open_data.ipynb | 40 ++++++++- src/openfigi_crawler.py | 168 ------------------------------------ 3 files changed, 39 insertions(+), 198 deletions(-) delete mode 100644 src/binance_api_client.py delete mode 100644 src/openfigi_crawler.py diff --git a/src/binance_api_client.py b/src/binance_api_client.py deleted file mode 100644 index 5863e35..0000000 --- a/src/binance_api_client.py +++ /dev/null @@ -1,29 +0,0 @@ -# %% -import numpy as np -import pandas as pd -import time - -from binance.client import Client - - -# %% -api_key = "****" -secret_key = "***" - -client = Client(api_key, secret_key) - - -# %% -coins_response = client.get_all_coins_info() -coins_df = pd.DataFrame.from_dict(coins_response, orient='columns') - - -# %% -pairs_list = coins_df.coin.apply(lambda x: f"{x}USDT") -client.get_historical_klines( - 'BTCUSDT', - interval=Client.KLINE_INTERVAL_1HOUR, - start_str='2022-04-21', - end_str='2022-04-22' -) - diff --git a/src/binance_open_data.ipynb b/src/binance_open_data.ipynb index dfe4396..ed8b9e1 100644 --- a/src/binance_open_data.ipynb +++ b/src/binance_open_data.ipynb @@ -267,6 +267,44 @@ "# Sort output by Close_time\n", "candles_1h_df.sort_values('Close_time')" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### (Optional) Use Binance API" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# %%\n", + "import pandas as pd \n", + "from binance.client import Client\n", + "\n", + "\n", + "# %%\n", + "api_key = \"****\"\n", + "secret_key = \"***\"\n", + "\n", + "client = Client(api_key, secret_key)\n", + "\n", + "\n", + "# %%\n", + "coins_response = client.get_all_coins_info()\n", + "coins_df = pd.DataFrame.from_dict(coins_response, orient='columns')\n", + "\n", + "\n", + "# %%\n", + "pairs_list = coins_df.coin.apply(lambda x: f\"{x}USDT\") \n", + "client.get_historical_klines(\n", + " 'BTCUSDT', \n", + " interval=Client.KLINE_INTERVAL_1HOUR,\n", + " start_str='2022-04-21', \n", + " end_str='2022-04-22'\n", + ")" + ] } ], "metadata": { @@ -285,7 +323,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.12" }, "orig_nbformat": 4, "vscode": { diff --git a/src/openfigi_crawler.py b/src/openfigi_crawler.py deleted file mode 100644 index 2662f7f..0000000 --- a/src/openfigi_crawler.py +++ /dev/null @@ -1,168 +0,0 @@ - -# %% Import dependencies -import os -from dataclasses import dataclass -from typing import Dict, Union - -import pandas as pd - -import httpx - -from sqlalchemy import types -from azure import AzureDbConnection, ConnectionSettings - - -# %% Data models -@dataclass -class AssetInfo: - FIGI: str - Ticker: str - Title: Union[str, None] - Description: Union[str, None] - AssetType: str = 'Cryptocurrency' - SourceId: str = 'OpenFigi API' - Version: str = 'v202206' - - def as_dict(self) -> Dict[str, str]: - return {'Figi': self.FIGI, 'Ticker': self.Ticker} - - -# %% FIGI provider -class OpenFigiProvider: - """ - OpenFigi API provider - - References: - https://www.openfigi.com/assets/local/figi-allocation-rules.pdf - https://www.openfigi.com/search - """ - @staticmethod - def _send_request(ticker: str, asset_type: str) -> pd.DataFrame: - api_url = f'https://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22{asset_type}%22&num_rows=100&simpleSearchString={ticker}&start=0' - response = httpx.get(api_url) - - json_response = response.json() - return pd.DataFrame.from_dict(json_response['result'], orient='columns') - - - @staticmethod - def _find_figi(df: pd.DataFrame, field_name: str) -> Union[str, None]: - if len(df) == 0 or field_name not in df.columns: - return None - - result = df[field_name].dropna().unique() - - if (len(result) != 1): - print(f'[WARN] Multiple ({len(result)}) FIGI records was found') - return None - - return result[0] - - - @staticmethod - def _find_name(df: pd.DataFrame) -> Union[str, None]: - if len(df) == 0 or 'DS002_sd' not in df.columns: - return None - - result = df['DS002_sd'].dropna().unique() - - if (len(result) != 1): - print(f'[WARN] Multiple ({len(result)}) name records was found') - return None - - return result[0] - - - def search(self, ticker: str, asset_type: str = 'Curncy') -> Union[AssetInfo, None]: - """Return FIGI for pair""" - - response_df = OpenFigiProvider._send_request(ticker, asset_type) - - figi = OpenFigiProvider._find_figi(response_df, 'kkg_pairFIGI_sd') - - if figi is None: - base_quote = ticker.split('-')[0] - print(f'[INFO] {ticker} > Try to search using base quote {base_quote}') - - response_df = OpenFigiProvider._send_request(base_quote, asset_type) - figi = OpenFigiProvider._find_figi(response_df, 'kkg_baseAssetFigi_sd') - - if figi is None: - return None - - return AssetInfo(figi, ticker, None, None) - - -#%% -figi_provider = OpenFigiProvider() - -assert figi_provider.search('WAX-USD') == None -assert figi_provider.search('ABCD') == None - - -# %% Tests -expected_pairs = { - 'BNB-USD': 'KKG000007HZ5', - 'ETH-USD': 'BBG00J3NBWD7', - 'BTC-USD': 'BBG006FCL7J4', - 'SOL-USD': 'BBG013WVY457', - 'UNI-USD': 'BBG013TZFVW3', - 'SUSHI-USD': 'KKG0000010W1', - 'AVAX-USD': 'KKG000007J36' -} - - -for k, v in expected_pairs.items(): - actual = figi_provider.search(k) - print(actual.as_dict()) - assert ( - isinstance(actual, AssetInfo) - and actual.FIGI == v - and actual.Ticker == k - ) - - -# %% Get assets for searching figi -pair_names = [x[:-4] for x in os.listdir("../data")] - -def insert_dash(text: str, position: int) -> str: - if '-' not in text: - return text[:position] + '-' + text[position:] - else: - return text - -usd_pairs = [ - insert_dash(s.upper(), 3) - for s in pair_names if "usd" in s -] - -print(usd_pairs[1:10]) - - -# %% -figi_provider = OpenFigiProvider() -pair_figi_list = [figi_provider.search(p) for p in usd_pairs] - - -# %% ---- -conn_settings = ConnectionSettings(server='****.database.windows.net', database='market-data-db', username='', password='****') -db_conn = AzureDbConnection(conn_settings) - -db_conn.connect() -for t in db_conn.get_tables(): - print(t) - - -# %% -db_mapping = { - 'Figi': types.CHAR(length=12), - 'Ticker': types.VARCHAR(length=12) -} - -figi_df = pd.DataFrame([t.as_dict() for t in pair_figi_list if isinstance(t, AssetInfo)]) -db_conn.insert(figi_df, 'figi', db_mapping) - - -# %% -db_conn.dispose() -print('Completed') From 3882efc9a23fabd95dfd73fccf95f384d9366927 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Mon, 17 Oct 2022 17:02:23 +0000 Subject: [PATCH 14/17] Update parser --- src/bitfinex_crypto_parser.py | 103 ++++++++++++++++++++++++---------- 1 file changed, 72 insertions(+), 31 deletions(-) diff --git a/src/bitfinex_crypto_parser.py b/src/bitfinex_crypto_parser.py index 313cc3b..20a386b 100644 --- a/src/bitfinex_crypto_parser.py +++ b/src/bitfinex_crypto_parser.py @@ -1,42 +1,41 @@ #!/usr/bin/python3 """ -Data source: https://www.kaggle.com/code/tencars/bitfinexdataset +Data source: https://www.kaggle.com/datasets/tencars/392-crypto-currency-pairs-at-minute-resolution """ # %% import os + import numpy as np import pandas as pd from sqlalchemy import types from azure import AzureDbConnection, ConnectionSettings - # %% -#> ~/apps/resistance/data +# In terminal: +#> kaggle -v # must be >1.15 +#> mkdir data; cd data #> kaggle datasets download tencars/392-crypto-currency-pairs-at-minute-resolution #> unzip 392-crypto-currency-pairs-at-minute-resolution.zip -input_path = "../data" +input_dir = "../data" # Get names and number of available currency pairs -pair_names = [x[:-4] for x in os.listdir(input_path)] -n_pairs = len(pair_names) +pair_names = [x[:-4] for x in os.listdir(input_dir)] +usd_pairs = [s for s in pair_names if "usd" in s] # Print the first 50 currency pair names -print("These are the first 50 out of {} currency pairs in the dataset:".format(n_pairs)) -print(pair_names[0:50]) - -usd_pairs = [s for s in pair_names if "usd" in s] -print(usd_pairs) +print(f"These are the first 10 out of {len(usd_pairs)} currency pairs in the dataset:") +print(usd_pairs[0:10]) # %% -def load_data(symbol, source=input_path): - path_name = source + "/" + symbol + ".csv" +def load_data(symbol: str, input_dir: str) -> pd.DataFrame: + path_name = input_dir + "/" + symbol + ".csv" # Load data df = pd.read_csv(path_name, index_col='time', dtype={'open': np.float64, 'high': np.float64, 'low': np.float64, 'close': np.float64, 'volume': np.float64}) @@ -50,23 +49,50 @@ def load_data(symbol, source=input_path): return df[['open', 'high', 'low', 'close', 'volume']] +def calc_ohlcv_1h(df: pd.DataFrame) -> pd.DataFrame: + df['hour'] = df.index.to_period('H') + + return ( + df + .groupby(['hour']) + .agg( + { + 'open': 'first', + 'high': max, + 'low': min, + 'close': 'last', + 'volume': sum, + #'time': max + } + ) + .reset_index() + ) + + # %% ---- -sample_df = load_data("ethusd") -sample_df +ethusd_1m = load_data("ethusd", input_dir) +ethusd_1h = calc_ohlcv_1h(ethusd_1m) + +ethusd_1h.tail() # %% ---- -db_conn = AzureDbConnection(conn_settings) +conn_settings = ConnectionSettings( + 'datainstinct', + 'market-data-db', + 'demo', + '0test_test_AND_test' +) +db_conn = AzureDbConnection(conn_settings) db_conn.connect() + for t in db_conn.get_tables(): print(t) # %% -min_candels_n = 10000 - db_mapping = { 'FIGI': types.CHAR(length=12), 'open': types.DECIMAL(precision=19, scale=9), @@ -75,28 +101,43 @@ db_mapping = { 'low': types.DECIMAL(precision=19, scale=9), 'volume': types.DECIMAL(precision=19, scale=9), 'time': types.DATETIME(), - 'source_id': types.SMALLINT, + 'source_id': types.SMALLINT(), 'version': types.VARCHAR(length=12), 'interval': types.CHAR(length=2) } + +# %% +pd.options.mode.chained_assignment = None + +min_candels_n = 10000 + for pair in usd_pairs: - print(f'Starting read {pair}...') - candles_df = load_data(pair) + print(f'INFO | {pair} > Starting read dataset...') - candles_df['FIGI'] = pair - candles_df['time'] = candles_df.index - candles_df['source_id'] = 128 - candles_df['version'] = 'v202206' - candles_df['interval'] = '1M' + candles_df = load_data(pair, input_dir) - if candles_df.shape[0] > min_candels_n: - print('{} rows from {} to {}'.format(candles_df.shape[0], min(candles_df['time']), max(candles_df['time']))) + if len(candles_df) > min_candels_n: - print(f'Starting insert {pair}...') - db_conn.insert(candles_df, 'crypto', db_mapping) + df = candles_df.loc['2022-07-01':'2022-10-01'] + + if len(df) > 0: + df = calc_ohlcv_1h(df) + + df['FIGI'] = pair + df['time'] = df.hour.apply(lambda h: h.to_timestamp()) + df['source_id'] = 1 + df['version'] = 'v20221001' + df['interval'] = '1H' + df.drop(columns='hour', inplace=True) + + print(f'INFO | {pair} > Starting insert to DB...') + print('DEBUG | {} rows from {} to {}'.format(df.shape[0], min(df['time']), max(df['time']))) + db_conn.insert(df, 'crypto', db_mapping) + else: + print(f'WARN | {pair} > No new records') else: - print(f'WARN: {pair} has only {candles_df.shape[0]} records') + print(f'WARN | {pair} > Only {candles_df.shape[0]} records') # %% From 28450cdeab937f6f38358e10f1256fbae97ce6f2 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Mon, 17 Oct 2022 22:44:14 +0000 Subject: [PATCH 15/17] Processing exceptions --- src/bitfinex_crypto_parser.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bitfinex_crypto_parser.py b/src/bitfinex_crypto_parser.py index 20a386b..2084877 100644 --- a/src/bitfinex_crypto_parser.py +++ b/src/bitfinex_crypto_parser.py @@ -133,7 +133,11 @@ for pair in usd_pairs: print(f'INFO | {pair} > Starting insert to DB...') print('DEBUG | {} rows from {} to {}'.format(df.shape[0], min(df['time']), max(df['time']))) - db_conn.insert(df, 'crypto', db_mapping) + try: + db_conn.insert(df, 'crypto', db_mapping) + except Exception as ex: + print(f'ERROR | {pair} > {ex}') + else: print(f'WARN | {pair} > No new records') else: From 5c6ea177ddc3e1c1d15f2d1e021773b902a24516 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Tue, 18 Oct 2022 15:55:03 +0000 Subject: [PATCH 16/17] Minor changes --- src/azure.py | 2 +- src/bitfinex_crypto_parser.py | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/azure.py b/src/azure.py index e962acd..cdfcf77 100644 --- a/src/azure.py +++ b/src/azure.py @@ -54,7 +54,7 @@ class AzureDbConnection: con=self._db, schema='dbo', name=target_table, - if_exists='replace', # or append + if_exists='append', # or replace index=False, chunksize=chunksize, dtype=db_mapping diff --git a/src/bitfinex_crypto_parser.py b/src/bitfinex_crypto_parser.py index 2084877..86c9f61 100644 --- a/src/bitfinex_crypto_parser.py +++ b/src/bitfinex_crypto_parser.py @@ -21,7 +21,7 @@ from azure import AzureDbConnection, ConnectionSettings #> kaggle datasets download tencars/392-crypto-currency-pairs-at-minute-resolution #> unzip 392-crypto-currency-pairs-at-minute-resolution.zip -input_dir = "../data" +input_dir: str = "../data" # Get names and number of available currency pairs pair_names = [x[:-4] for x in os.listdir(input_dir)] @@ -79,10 +79,10 @@ ethusd_1h.tail() # %% ---- conn_settings = ConnectionSettings( - 'datainstinct', - 'market-data-db', - 'demo', - '0test_test_AND_test' + '', + '', + '', + '****' ) db_conn = AzureDbConnection(conn_settings) @@ -112,6 +112,7 @@ pd.options.mode.chained_assignment = None min_candels_n = 10000 +i = 1 for pair in usd_pairs: print(f'INFO | {pair} > Starting read dataset...') @@ -119,7 +120,7 @@ for pair in usd_pairs: if len(candles_df) > min_candels_n: - df = candles_df.loc['2022-07-01':'2022-10-01'] + df = candles_df.loc[:'2022-10-01'] if len(df) > 0: df = calc_ohlcv_1h(df) @@ -131,10 +132,10 @@ for pair in usd_pairs: df['interval'] = '1H' df.drop(columns='hour', inplace=True) - print(f'INFO | {pair} > Starting insert to DB...') + print(f'INFO | {pair} > Starting insert to DB ({i} of {len(usd_pairs)})...') print('DEBUG | {} rows from {} to {}'.format(df.shape[0], min(df['time']), max(df['time']))) try: - db_conn.insert(df, 'crypto', db_mapping) + db_conn.insert(df, 'crypto_1h', db_mapping) except Exception as ex: print(f'ERROR | {pair} > {ex}') @@ -142,6 +143,7 @@ for pair in usd_pairs: print(f'WARN | {pair} > No new records') else: print(f'WARN | {pair} > Only {candles_df.shape[0]} records') + i += 1 # %% From 37ec6bef9388b3e6c60783cc964cc4de00a4fed4 Mon Sep 17 00:00:00 2001 From: codez0mb1e Date: Sat, 22 Oct 2022 09:25:59 +0000 Subject: [PATCH 17/17] Remove redundant --- src/azure.py | 66 --------------- src/bitfinex_crypto_parser.py | 150 ---------------------------------- 2 files changed, 216 deletions(-) delete mode 100644 src/azure.py delete mode 100644 src/bitfinex_crypto_parser.py diff --git a/src/azure.py b/src/azure.py deleted file mode 100644 index cdfcf77..0000000 --- a/src/azure.py +++ /dev/null @@ -1,66 +0,0 @@ - -# %% Import dependencies ---- -from dataclasses import dataclass -from typing import Dict, Any - -from sqlalchemy import create_engine, inspect - -import pandas as pd -import urllib - - -# %% Models -@dataclass(frozen=True) -class ConnectionSettings: - """Connection Settings""" - server: str - database: str - username: str - password: str - driver: str = '{ODBC Driver 18 for SQL Server}' - timeout: int = 30 - - -# %% Connection -class AzureDbConnection: - """Azure SQL database connection.""" - - def __init__(self, conn_settings: ConnectionSettings, echo: bool = False) -> None: - conn_params = urllib.parse.quote_plus( - 'Driver=%s;' % conn_settings.driver + - 'Server=tcp:%s.database.windows.net,1433;' % conn_settings.server + - 'Database=%s;' % conn_settings.database + - 'Uid=%s;' % conn_settings.username + - 'Pwd=%s;' % conn_settings.password + - 'Encrypt=yes;' + - 'TrustServerCertificate=no;' + - 'Connection Timeout=%s;' % conn_settings.timeout - ) - conn_string = f'mssql+pyodbc:///?odbc_connect={conn_params}' - - self._db = create_engine(conn_string, echo=echo) - - def connect(self) -> None: - """Estimate connection""" - self._conn = self._db.connect() - - def get_tables(self) -> list[str]: - """Get list of tables""" - inspector = inspect(self._db) - return [t for t in inspector.get_table_names()] - - def insert(self, inserted_data: pd.DataFrame, target_table: str, db_mapping: Dict[str, Any], chunksize: int = 10000) -> None: - inserted_data.to_sql( - con=self._db, - schema='dbo', - name=target_table, - if_exists='append', # or replace - index=False, - chunksize=chunksize, - dtype=db_mapping - ) - - def dispose(self) -> None: - """Dispose opened connections""" - self._conn.close() - self._db.dispose() diff --git a/src/bitfinex_crypto_parser.py b/src/bitfinex_crypto_parser.py deleted file mode 100644 index 86c9f61..0000000 --- a/src/bitfinex_crypto_parser.py +++ /dev/null @@ -1,150 +0,0 @@ -#!/usr/bin/python3 - -""" -Data source: https://www.kaggle.com/datasets/tencars/392-crypto-currency-pairs-at-minute-resolution -""" - -# %% -import os - -import numpy as np -import pandas as pd -from sqlalchemy import types - -from azure import AzureDbConnection, ConnectionSettings - -# %% - -# In terminal: -#> kaggle -v # must be >1.15 -#> mkdir data; cd data -#> kaggle datasets download tencars/392-crypto-currency-pairs-at-minute-resolution -#> unzip 392-crypto-currency-pairs-at-minute-resolution.zip - -input_dir: str = "../data" - -# Get names and number of available currency pairs -pair_names = [x[:-4] for x in os.listdir(input_dir)] -usd_pairs = [s for s in pair_names if "usd" in s] - -# Print the first 50 currency pair names -print(f"These are the first 10 out of {len(usd_pairs)} currency pairs in the dataset:") -print(usd_pairs[0:10]) - - -# %% - -def load_data(symbol: str, input_dir: str) -> pd.DataFrame: - path_name = input_dir + "/" + symbol + ".csv" - - # Load data - df = pd.read_csv(path_name, index_col='time', dtype={'open': np.float64, 'high': np.float64, 'low': np.float64, 'close': np.float64, 'volume': np.float64}) - df.index = pd.to_datetime(df.index, unit='ms') - df = df[~df.index.duplicated(keep='first')] - - # As mentioned in the description, bins without any change are not recorded. - # We have to fill these gaps by filling them with the last value until a change occurs. - #df = df.resample('1T').pad() - - return df[['open', 'high', 'low', 'close', 'volume']] - - -def calc_ohlcv_1h(df: pd.DataFrame) -> pd.DataFrame: - df['hour'] = df.index.to_period('H') - - return ( - df - .groupby(['hour']) - .agg( - { - 'open': 'first', - 'high': max, - 'low': min, - 'close': 'last', - 'volume': sum, - #'time': max - } - ) - .reset_index() - ) - - -# %% ---- -ethusd_1m = load_data("ethusd", input_dir) -ethusd_1h = calc_ohlcv_1h(ethusd_1m) - -ethusd_1h.tail() - - - -# %% ---- -conn_settings = ConnectionSettings( - '', - '', - '', - '****' -) - -db_conn = AzureDbConnection(conn_settings) -db_conn.connect() - -for t in db_conn.get_tables(): - print(t) - - -# %% -db_mapping = { - 'FIGI': types.CHAR(length=12), - 'open': types.DECIMAL(precision=19, scale=9), - 'high': types.DECIMAL(precision=19, scale=9), - 'close': types.DECIMAL(precision=19, scale=9), - 'low': types.DECIMAL(precision=19, scale=9), - 'volume': types.DECIMAL(precision=19, scale=9), - 'time': types.DATETIME(), - 'source_id': types.SMALLINT(), - 'version': types.VARCHAR(length=12), - 'interval': types.CHAR(length=2) -} - - -# %% -pd.options.mode.chained_assignment = None - -min_candels_n = 10000 - -i = 1 -for pair in usd_pairs: - print(f'INFO | {pair} > Starting read dataset...') - - candles_df = load_data(pair, input_dir) - - if len(candles_df) > min_candels_n: - - df = candles_df.loc[:'2022-10-01'] - - if len(df) > 0: - df = calc_ohlcv_1h(df) - - df['FIGI'] = pair - df['time'] = df.hour.apply(lambda h: h.to_timestamp()) - df['source_id'] = 1 - df['version'] = 'v20221001' - df['interval'] = '1H' - df.drop(columns='hour', inplace=True) - - print(f'INFO | {pair} > Starting insert to DB ({i} of {len(usd_pairs)})...') - print('DEBUG | {} rows from {} to {}'.format(df.shape[0], min(df['time']), max(df['time']))) - try: - db_conn.insert(df, 'crypto_1h', db_mapping) - except Exception as ex: - print(f'ERROR | {pair} > {ex}') - - else: - print(f'WARN | {pair} > No new records') - else: - print(f'WARN | {pair} > Only {candles_df.shape[0]} records') - i += 1 - - -# %% -db_conn.dispose()