Compare commits

..

4 Commits

Author SHA1 Message Date
codez0mb1e
28450cdeab Processing exceptions 2022-10-17 22:44:14 +00:00
codez0mb1e
3882efc9a2 Update parser 2022-10-17 17:02:23 +00:00
codez0mb1e
aa7d45380b Remove redundant 2022-10-17 16:08:16 +00:00
codez0mb1e
138587cd6b Update SQL driver and minor improvements 2022-10-17 12:40:22 +00:00
5 changed files with 118 additions and 232 deletions

View File

@ -17,7 +17,7 @@ class ConnectionSettings:
database: str database: str
username: str username: str
password: str password: str
driver: str = '{ODBC Driver 17 for SQL Server}' driver: str = '{ODBC Driver 18 for SQL Server}'
timeout: int = 30 timeout: int = 30
@ -28,10 +28,10 @@ class AzureDbConnection:
def __init__(self, conn_settings: ConnectionSettings, echo: bool = False) -> None: def __init__(self, conn_settings: ConnectionSettings, echo: bool = False) -> None:
conn_params = urllib.parse.quote_plus( conn_params = urllib.parse.quote_plus(
'Driver=%s;' % conn_settings.driver + 'Driver=%s;' % conn_settings.driver +
'Server=tcp:%s,1433;' % conn_settings.server + 'Server=tcp:%s.database.windows.net,1433;' % conn_settings.server +
'Database=%s;' % conn_settings.database + 'Database=%s;' % conn_settings.database +
'Uid=%s;' % conn_settings.username + 'Uid=%s;' % conn_settings.username +
'Pwd={%s};' % conn_settings.password + 'Pwd=%s;' % conn_settings.password +
'Encrypt=yes;' + 'Encrypt=yes;' +
'TrustServerCertificate=no;' + 'TrustServerCertificate=no;' +
'Connection Timeout=%s;' % conn_settings.timeout 'Connection Timeout=%s;' % conn_settings.timeout

View File

@ -1,29 +0,0 @@
# %%
import numpy as np
import pandas as pd
import time
from binance.client import Client
# %%
api_key = "****"
secret_key = "***"
client = Client(api_key, secret_key)
# %%
coins_response = client.get_all_coins_info()
coins_df = pd.DataFrame.from_dict(coins_response, orient='columns')
# %%
pairs_list = coins_df.coin.apply(lambda x: f"{x}USDT")
client.get_historical_klines(
'BTCUSDT',
interval=Client.KLINE_INTERVAL_1HOUR,
start_str='2022-04-21',
end_str='2022-04-22'
)

View File

@ -267,6 +267,44 @@
"# Sort output by Close_time\n", "# Sort output by Close_time\n",
"candles_1h_df.sort_values('Close_time')" "candles_1h_df.sort_values('Close_time')"
] ]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### (Optional) Use Binance API"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# %%\n",
"import pandas as pd \n",
"from binance.client import Client\n",
"\n",
"\n",
"# %%\n",
"api_key = \"****\"\n",
"secret_key = \"***\"\n",
"\n",
"client = Client(api_key, secret_key)\n",
"\n",
"\n",
"# %%\n",
"coins_response = client.get_all_coins_info()\n",
"coins_df = pd.DataFrame.from_dict(coins_response, orient='columns')\n",
"\n",
"\n",
"# %%\n",
"pairs_list = coins_df.coin.apply(lambda x: f\"{x}USDT\") \n",
"client.get_historical_klines(\n",
" 'BTCUSDT', \n",
" interval=Client.KLINE_INTERVAL_1HOUR,\n",
" start_str='2022-04-21', \n",
" end_str='2022-04-22'\n",
")"
]
} }
], ],
"metadata": { "metadata": {
@ -285,7 +323,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.9.13" "version": "3.9.12"
}, },
"orig_nbformat": 4, "orig_nbformat": 4,
"vscode": { "vscode": {

View File

@ -1,42 +1,41 @@
#!/usr/bin/python3 #!/usr/bin/python3
""" """
Data source: https://www.kaggle.com/code/tencars/bitfinexdataset Data source: https://www.kaggle.com/datasets/tencars/392-crypto-currency-pairs-at-minute-resolution
""" """
# %% # %%
import os import os
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from sqlalchemy import types from sqlalchemy import types
from azure import AzureDbConnection, ConnectionSettings from azure import AzureDbConnection, ConnectionSettings
# %% # %%
#> ~/apps/resistance/data # In terminal:
#> kaggle -v # must be >1.15
#> mkdir data; cd data
#> kaggle datasets download tencars/392-crypto-currency-pairs-at-minute-resolution #> kaggle datasets download tencars/392-crypto-currency-pairs-at-minute-resolution
#> unzip 392-crypto-currency-pairs-at-minute-resolution.zip #> unzip 392-crypto-currency-pairs-at-minute-resolution.zip
input_path = "../data" input_dir = "../data"
# Get names and number of available currency pairs # Get names and number of available currency pairs
pair_names = [x[:-4] for x in os.listdir(input_path)] pair_names = [x[:-4] for x in os.listdir(input_dir)]
n_pairs = len(pair_names) usd_pairs = [s for s in pair_names if "usd" in s]
# Print the first 50 currency pair names # Print the first 50 currency pair names
print("These are the first 50 out of {} currency pairs in the dataset:".format(n_pairs)) print(f"These are the first 10 out of {len(usd_pairs)} currency pairs in the dataset:")
print(pair_names[0:50]) print(usd_pairs[0:10])
usd_pairs = [s for s in pair_names if "usd" in s]
print(usd_pairs)
# %% # %%
def load_data(symbol, source=input_path): def load_data(symbol: str, input_dir: str) -> pd.DataFrame:
path_name = source + "/" + symbol + ".csv" path_name = input_dir + "/" + symbol + ".csv"
# Load data # Load data
df = pd.read_csv(path_name, index_col='time', dtype={'open': np.float64, 'high': np.float64, 'low': np.float64, 'close': np.float64, 'volume': np.float64}) df = pd.read_csv(path_name, index_col='time', dtype={'open': np.float64, 'high': np.float64, 'low': np.float64, 'close': np.float64, 'volume': np.float64})
@ -50,23 +49,50 @@ def load_data(symbol, source=input_path):
return df[['open', 'high', 'low', 'close', 'volume']] return df[['open', 'high', 'low', 'close', 'volume']]
def calc_ohlcv_1h(df: pd.DataFrame) -> pd.DataFrame:
df['hour'] = df.index.to_period('H')
return (
df
.groupby(['hour'])
.agg(
{
'open': 'first',
'high': max,
'low': min,
'close': 'last',
'volume': sum,
#'time': max
}
)
.reset_index()
)
# %% ---- # %% ----
sample_df = load_data("ethusd") ethusd_1m = load_data("ethusd", input_dir)
sample_df ethusd_1h = calc_ohlcv_1h(ethusd_1m)
ethusd_1h.tail()
# %% ---- # %% ----
db_conn = AzureDbConnection(conn_settings) conn_settings = ConnectionSettings(
'datainstinct',
'market-data-db',
'demo',
'0test_test_AND_test'
)
db_conn = AzureDbConnection(conn_settings)
db_conn.connect() db_conn.connect()
for t in db_conn.get_tables(): for t in db_conn.get_tables():
print(t) print(t)
# %% # %%
min_candels_n = 10000
db_mapping = { db_mapping = {
'FIGI': types.CHAR(length=12), 'FIGI': types.CHAR(length=12),
'open': types.DECIMAL(precision=19, scale=9), 'open': types.DECIMAL(precision=19, scale=9),
@ -75,28 +101,47 @@ db_mapping = {
'low': types.DECIMAL(precision=19, scale=9), 'low': types.DECIMAL(precision=19, scale=9),
'volume': types.DECIMAL(precision=19, scale=9), 'volume': types.DECIMAL(precision=19, scale=9),
'time': types.DATETIME(), 'time': types.DATETIME(),
'source_id': types.SMALLINT, 'source_id': types.SMALLINT(),
'version': types.VARCHAR(length=12), 'version': types.VARCHAR(length=12),
'interval': types.CHAR(length=2) 'interval': types.CHAR(length=2)
} }
# %%
pd.options.mode.chained_assignment = None
min_candels_n = 10000
for pair in usd_pairs: for pair in usd_pairs:
print(f'Starting read {pair}...') print(f'INFO | {pair} > Starting read dataset...')
candles_df = load_data(pair)
candles_df['FIGI'] = pair candles_df = load_data(pair, input_dir)
candles_df['time'] = candles_df.index
candles_df['source_id'] = 128
candles_df['version'] = 'v202206'
candles_df['interval'] = '1M'
if candles_df.shape[0] > min_candels_n: if len(candles_df) > min_candels_n:
print('{} rows from {} to {}'.format(candles_df.shape[0], min(candles_df['time']), max(candles_df['time'])))
print(f'Starting insert {pair}...') df = candles_df.loc['2022-07-01':'2022-10-01']
db_conn.insert(candles_df, 'crypto', db_mapping)
if len(df) > 0:
df = calc_ohlcv_1h(df)
df['FIGI'] = pair
df['time'] = df.hour.apply(lambda h: h.to_timestamp())
df['source_id'] = 1
df['version'] = 'v20221001'
df['interval'] = '1H'
df.drop(columns='hour', inplace=True)
print(f'INFO | {pair} > Starting insert to DB...')
print('DEBUG | {} rows from {} to {}'.format(df.shape[0], min(df['time']), max(df['time'])))
try:
db_conn.insert(df, 'crypto', db_mapping)
except Exception as ex:
print(f'ERROR | {pair} > {ex}')
else:
print(f'WARN | {pair} > No new records')
else: else:
print(f'WARN: {pair} has only {candles_df.shape[0]} records') print(f'WARN | {pair} > Only {candles_df.shape[0]} records')
# %% # %%

View File

@ -1,168 +0,0 @@
# %% Import dependencies
import os
from dataclasses import dataclass
from typing import Dict, Union
import pandas as pd
import httpx
from sqlalchemy import types
from azure import AzureDbConnection, ConnectionSettings
# %% Data models
@dataclass
class AssetInfo:
FIGI: str
Ticker: str
Title: Union[str, None]
Description: Union[str, None]
AssetType: str = 'Cryptocurrency'
SourceId: str = 'OpenFigi API'
Version: str = 'v202206'
def as_dict(self) -> Dict[str, str]:
return {'Figi': self.FIGI, 'Ticker': self.Ticker}
# %% FIGI provider
class OpenFigiProvider:
"""
OpenFigi API provider
References:
https://www.openfigi.com/assets/local/figi-allocation-rules.pdf
https://www.openfigi.com/search
"""
@staticmethod
def _send_request(ticker: str, asset_type: str) -> pd.DataFrame:
api_url = f'https://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22{asset_type}%22&num_rows=100&simpleSearchString={ticker}&start=0'
response = httpx.get(api_url)
json_response = response.json()
return pd.DataFrame.from_dict(json_response['result'], orient='columns')
@staticmethod
def _find_figi(df: pd.DataFrame, field_name: str) -> Union[str, None]:
if len(df) == 0 or field_name not in df.columns:
return None
result = df[field_name].dropna().unique()
if (len(result) != 1):
print(f'[WARN] Multiple ({len(result)}) FIGI records was found')
return None
return result[0]
@staticmethod
def _find_name(df: pd.DataFrame) -> Union[str, None]:
if len(df) == 0 or 'DS002_sd' not in df.columns:
return None
result = df['DS002_sd'].dropna().unique()
if (len(result) != 1):
print(f'[WARN] Multiple ({len(result)}) name records was found')
return None
return result[0]
def search(self, ticker: str, asset_type: str = 'Curncy') -> Union[AssetInfo, None]:
"""Return FIGI for pair"""
response_df = OpenFigiProvider._send_request(ticker, asset_type)
figi = OpenFigiProvider._find_figi(response_df, 'kkg_pairFIGI_sd')
if figi is None:
base_quote = ticker.split('-')[0]
print(f'[INFO] {ticker} > Try to search using base quote {base_quote}')
response_df = OpenFigiProvider._send_request(base_quote, asset_type)
figi = OpenFigiProvider._find_figi(response_df, 'kkg_baseAssetFigi_sd')
if figi is None:
return None
return AssetInfo(figi, ticker, None, None)
#%%
figi_provider = OpenFigiProvider()
assert figi_provider.search('WAX-USD') == None
assert figi_provider.search('ABCD') == None
# %% Tests
expected_pairs = {
'BNB-USD': 'KKG000007HZ5',
'ETH-USD': 'BBG00J3NBWD7',
'BTC-USD': 'BBG006FCL7J4',
'SOL-USD': 'BBG013WVY457',
'UNI-USD': 'BBG013TZFVW3',
'SUSHI-USD': 'KKG0000010W1',
'AVAX-USD': 'KKG000007J36'
}
for k, v in expected_pairs.items():
actual = figi_provider.search(k)
print(actual.as_dict())
assert (
isinstance(actual, AssetInfo)
and actual.FIGI == v
and actual.Ticker == k
)
# %% Get assets for searching figi
pair_names = [x[:-4] for x in os.listdir("../data")]
def insert_dash(text: str, position: int) -> str:
if '-' not in text:
return text[:position] + '-' + text[position:]
else:
return text
usd_pairs = [
insert_dash(s.upper(), 3)
for s in pair_names if "usd" in s
]
print(usd_pairs[1:10])
# %%
figi_provider = OpenFigiProvider()
pair_figi_list = [figi_provider.search(p) for p in usd_pairs]
# %% ----
conn_settings = ConnectionSettings(server='****.database.windows.net', database='market-data-db', username='<user>', password='****')
db_conn = AzureDbConnection(conn_settings)
db_conn.connect()
for t in db_conn.get_tables():
print(t)
# %%
db_mapping = {
'Figi': types.CHAR(length=12),
'Ticker': types.VARCHAR(length=12)
}
figi_df = pd.DataFrame([t.as_dict() for t in pair_figi_list if isinstance(t, AssetInfo)])
db_conn.insert(figi_df, 'figi', db_mapping)
# %%
db_conn.dispose()
print('Completed')