Parser update

This commit is contained in:
codez0mb1e 2022-06-19 22:59:58 +00:00
parent a3a48fbcba
commit 153fc5a230

View File

@ -1,7 +1,6 @@
#!/usr/bin/python3 #!/usr/bin/python3
""" """
Data source: https://www.kaggle.com/code/tencars/bitfinexdataset Data source: https://www.kaggle.com/code/tencars/bitfinexdataset
""" """
@ -15,6 +14,11 @@ from azure import AzureDbConnection, ConnectionSettings
# %% # %%
#> ~/apps/resistance/data
#> kaggle datasets download tencars/392-crypto-currency-pairs-at-minute-resolution
#> unzip 392-crypto-currency-pairs-at-minute-resolution.zip
input_path = "../data" input_path = "../data"
# Get names and number of available currency pairs # Get names and number of available currency pairs
@ -28,6 +32,7 @@ print(pair_names[0:50])
usd_pairs = [s for s in pair_names if "usd" in s] usd_pairs = [s for s in pair_names if "usd" in s]
print(usd_pairs) print(usd_pairs)
# %% # %%
def load_data(symbol, source=input_path): def load_data(symbol, source=input_path):
@ -46,12 +51,12 @@ def load_data(symbol, source=input_path):
# %% ---- # %% ----
solusd = load_data("solusd") sample_df = load_data("ethusd")
solusd.tail() sample_df
# %% ---- # %% ----
conn_settings = ConnectionSettings(server='***.database.windows.net', database='market-data-db', username='demo', password='***')
db_conn = AzureDbConnection(conn_settings) db_conn = AzureDbConnection(conn_settings)
db_conn.connect() db_conn.connect()
@ -63,7 +68,7 @@ for t in db_conn.get_tables():
min_candels_n = 10000 min_candels_n = 10000
db_mapping = { db_mapping = {
'FIGI': types.VARCHAR(length=12), 'FIGI': types.CHAR(length=12),
'open': types.DECIMAL(precision=19, scale=9), 'open': types.DECIMAL(precision=19, scale=9),
'high': types.DECIMAL(precision=19, scale=9), 'high': types.DECIMAL(precision=19, scale=9),
'close': types.DECIMAL(precision=19, scale=9), 'close': types.DECIMAL(precision=19, scale=9),
@ -82,14 +87,14 @@ for pair in usd_pairs:
candles_df['FIGI'] = pair candles_df['FIGI'] = pair
candles_df['time'] = candles_df.index candles_df['time'] = candles_df.index
candles_df['source_id'] = 128 candles_df['source_id'] = 128
candles_df['version'] = 'v202204' candles_df['version'] = 'v202206'
candles_df['interval'] = '1M' candles_df['interval'] = '1M'
if candles_df.shape[0] > min_candels_n: if candles_df.shape[0] > min_candels_n:
print('{} rows from {} to {}'.format(candles_df.shape[0], min(candles_df['time']), max(candles_df['time']))) print('{} rows from {} to {}'.format(candles_df.shape[0], min(candles_df['time']), max(candles_df['time'])))
print(f'Starting insert {pair}...') print(f'Starting insert {pair}...')
db_conn.insert(candles_df, 'Cryptocurrency', db_mapping) db_conn.insert(candles_df, 'crypto', db_mapping)
else: else:
print(f'WARN: {pair} has only {candles_df.shape[0]} records') print(f'WARN: {pair} has only {candles_df.shape[0]} records')