Processing exceptions

Update parser
Remove redundant
2024-10-22 19:41:02 +00:00 · 2022-10-17 22:44:14 +00:00 · 2022-10-17 17:02:23 +00:00 · 2022-10-17 16:08:16 +00:00 · 2022-10-17 12:40:22 +00:00
5 changed files with 118 additions and 232 deletions
--- a/src/azure.py
+++ b/src/azure.py
@ -17,7 +17,7 @@ class ConnectionSettings:
    database: str
    username: str
    password: str
-    driver: str = '{ODBC Driver 17 for SQL Server}'
+    driver: str = '{ODBC Driver 18 for SQL Server}'
    timeout: int = 30


@ -28,10 +28,10 @@ class AzureDbConnection:
    def __init__(self, conn_settings: ConnectionSettings, echo: bool = False) -> None:
        conn_params = urllib.parse.quote_plus(
            'Driver=%s;' % conn_settings.driver +
-            'Server=tcp:%s,1433;' % conn_settings.server +
+            'Server=tcp:%s.database.windows.net,1433;' % conn_settings.server +
            'Database=%s;' % conn_settings.database +
            'Uid=%s;' % conn_settings.username +
-            'Pwd={%s};' % conn_settings.password +
+            'Pwd=%s;' % conn_settings.password +
            'Encrypt=yes;' +
            'TrustServerCertificate=no;' +
            'Connection Timeout=%s;' % conn_settings.timeout
--- a/src/binance_api_client.py
+++ b/src/binance_api_client.py
@ -1,29 +0,0 @@
-# %%
-import numpy as np
-import pandas as pd 
-import time
-
-from binance.client import Client
-
-
-# %%
-api_key = "****"
-secret_key = "***"
-
-client = Client(api_key, secret_key)
-
-
-# %%
-coins_response = client.get_all_coins_info()
-coins_df = pd.DataFrame.from_dict(coins_response, orient='columns')
-
-
-# %%
-pairs_list = coins_df.coin.apply(lambda x: f"{x}USDT") 
-client.get_historical_klines(
-    'BTCUSDT', 
-    interval=Client.KLINE_INTERVAL_1HOUR,
-    start_str='2022-04-21', 
-    end_str='2022-04-22'
-)
-
--- a/src/binance_open_data.ipynb
+++ b/src/binance_open_data.ipynb
@ -267,6 +267,44 @@
    "# Sort output by Close_time\n",
    "candles_1h_df.sort_values('Close_time')"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### (Optional) Use Binance API"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# %%\n",
+    "import pandas as pd \n",
+    "from binance.client import Client\n",
+    "\n",
+    "\n",
+    "# %%\n",
+    "api_key = \"****\"\n",
+    "secret_key = \"***\"\n",
+    "\n",
+    "client = Client(api_key, secret_key)\n",
+    "\n",
+    "\n",
+    "# %%\n",
+    "coins_response = client.get_all_coins_info()\n",
+    "coins_df = pd.DataFrame.from_dict(coins_response, orient='columns')\n",
+    "\n",
+    "\n",
+    "# %%\n",
+    "pairs_list = coins_df.coin.apply(lambda x: f\"{x}USDT\") \n",
+    "client.get_historical_klines(\n",
+    "    'BTCUSDT', \n",
+    "    interval=Client.KLINE_INTERVAL_1HOUR,\n",
+    "    start_str='2022-04-21', \n",
+    "    end_str='2022-04-22'\n",
+    ")"
+   ]
  }
 ],
 "metadata": {
@ -285,7 +323,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.9.12"
  },
  "orig_nbformat": 4,
  "vscode": {
--- a/src/bitfinex_crypto_parser.py
+++ b/src/bitfinex_crypto_parser.py
@ -1,42 +1,41 @@
 #!/usr/bin/python3

 """
-Data source: https://www.kaggle.com/code/tencars/bitfinexdataset
+Data source: https://www.kaggle.com/datasets/tencars/392-crypto-currency-pairs-at-minute-resolution
 """

 # %%
 import os
+
 import numpy as np
 import pandas as pd
 from sqlalchemy import types

 from azure import AzureDbConnection, ConnectionSettings

-
 # %%

-#> ~/apps/resistance/data
+# In terminal:
+#> kaggle -v # must be >1.15
+#> mkdir data; cd data
 #> kaggle datasets download tencars/392-crypto-currency-pairs-at-minute-resolution
 #> unzip 392-crypto-currency-pairs-at-minute-resolution.zip

-input_path = "../data"
+input_dir = "../data"

 # Get names and number of available currency pairs
-pair_names = [x[:-4] for x in os.listdir(input_path)]
-n_pairs = len(pair_names)
+pair_names = [x[:-4] for x in os.listdir(input_dir)]
+usd_pairs = [s for s in pair_names if "usd" in s]

 # Print the first 50 currency pair names
-print("These are the first 50 out of {} currency pairs in the dataset:".format(n_pairs))
-print(pair_names[0:50])
-
-usd_pairs = [s for s in pair_names if "usd" in s]
-print(usd_pairs)
+print(f"These are the first 10 out of {len(usd_pairs)} currency pairs in the dataset:")
+print(usd_pairs[0:10])


 # %%

-def load_data(symbol, source=input_path):
-    path_name = source + "/" + symbol + ".csv"
+def load_data(symbol: str, input_dir: str) -> pd.DataFrame:
+    path_name = input_dir + "/" + symbol + ".csv"

    # Load data
    df = pd.read_csv(path_name, index_col='time', dtype={'open': np.float64, 'high': np.float64, 'low': np.float64, 'close': np.float64, 'volume': np.float64})
@ -50,23 +49,50 @@ def load_data(symbol, source=input_path):
    return df[['open', 'high', 'low', 'close', 'volume']]


+def calc_ohlcv_1h(df: pd.DataFrame) -> pd.DataFrame:
+    df['hour'] = df.index.to_period('H')
+    
+    return (
+        df
+            .groupby(['hour'])
+            .agg(
+                {
+                    'open': 'first',
+                    'high': max,
+                    'low': min,
+                    'close': 'last',
+                    'volume': sum,
+                    #'time': max
+                }
+            )
+            .reset_index()
+        )
+
+
 # %% ----
-sample_df = load_data("ethusd")
-sample_df
+ethusd_1m = load_data("ethusd", input_dir)
+ethusd_1h = calc_ohlcv_1h(ethusd_1m)
+
+ethusd_1h.tail()



 # %% ----
+conn_settings = ConnectionSettings(
+    'datainstinct',
+    'market-data-db',
+    'demo',
+    '0test_test_AND_test'
+)
+
 db_conn = AzureDbConnection(conn_settings)
-
 db_conn.connect()
+
 for t in db_conn.get_tables():
    print(t)


 # %%
-min_candels_n = 10000
-
 db_mapping = {
    'FIGI': types.CHAR(length=12),
    'open': types.DECIMAL(precision=19, scale=9),
@ -75,28 +101,47 @@ db_mapping = {
    'low': types.DECIMAL(precision=19, scale=9),
    'volume': types.DECIMAL(precision=19, scale=9),
    'time': types.DATETIME(),
-    'source_id': types.SMALLINT,
+    'source_id': types.SMALLINT(),
    'version': types.VARCHAR(length=12),
    'interval': types.CHAR(length=2)
 }

+
+# %%
+pd.options.mode.chained_assignment = None 
+
+min_candels_n = 10000
+
 for pair in usd_pairs:
-    print(f'Starting read {pair}...')
-    candles_df = load_data(pair)
+    print(f'INFO | {pair} > Starting read dataset...')

-    candles_df['FIGI'] = pair
-    candles_df['time'] = candles_df.index
-    candles_df['source_id'] = 128
-    candles_df['version'] = 'v202206'
-    candles_df['interval'] = '1M'
+    candles_df = load_data(pair, input_dir)

-    if candles_df.shape[0] > min_candels_n:
-        print('{} rows from {} to {}'.format(candles_df.shape[0], min(candles_df['time']), max(candles_df['time'])))
+    if len(candles_df) > min_candels_n:
+
+        df = candles_df.loc['2022-07-01':'2022-10-01']
+
+        if len(df) > 0:
+            df = calc_ohlcv_1h(df)
+
+            df['FIGI'] = pair
+            df['time'] = df.hour.apply(lambda h: h.to_timestamp())
+            df['source_id'] = 1
+            df['version'] = 'v20221001'
+            df['interval'] = '1H'
+            df.drop(columns='hour', inplace=True)
+
+            print(f'INFO | {pair} > Starting insert to DB...')
+            print('DEBUG | {} rows from {} to {}'.format(df.shape[0], min(df['time']), max(df['time'])))
+            try:
+                db_conn.insert(df, 'crypto', db_mapping)
+            except Exception as ex:
+                print(f'ERROR | {pair} > {ex}')

-        print(f'Starting insert {pair}...')
-        db_conn.insert(candles_df, 'crypto', db_mapping)
        else:
-        print(f'WARN: {pair} has only {candles_df.shape[0]} records')
+            print(f'WARN | {pair} > No new records')
+    else:
+        print(f'WARN | {pair} > Only {candles_df.shape[0]} records')


 # %%
--- a/src/openfigi_crawler.py
+++ b/src/openfigi_crawler.py
@ -1,168 +0,0 @@
-
-# %% Import dependencies
-import os
-from dataclasses import dataclass
-from typing import Dict, Union
-
-import pandas as pd
-
-import httpx
-
-from sqlalchemy import types
-from azure import AzureDbConnection, ConnectionSettings
-
-
-# %% Data models
-@dataclass
-class AssetInfo:
-    FIGI: str
-    Ticker: str
-    Title: Union[str, None]
-    Description: Union[str, None]
-    AssetType: str = 'Cryptocurrency'
-    SourceId: str = 'OpenFigi API'
-    Version: str = 'v202206'
-
-    def as_dict(self) -> Dict[str, str]:
-        return {'Figi': self.FIGI, 'Ticker': self.Ticker}
-
-
-# %% FIGI provider
-class OpenFigiProvider:
-    """
-    OpenFigi API provider
-
-    References:
-            https://www.openfigi.com/assets/local/figi-allocation-rules.pdf
-            https://www.openfigi.com/search
-    """
-    @staticmethod
-    def _send_request(ticker: str, asset_type: str) -> pd.DataFrame:
-        api_url = f'https://www.openfigi.com/search/query?facetQuery=MARKET_SECTOR_DES:%22{asset_type}%22&num_rows=100&simpleSearchString={ticker}&start=0'
-        response = httpx.get(api_url)
-
-        json_response = response.json()
-        return pd.DataFrame.from_dict(json_response['result'], orient='columns')
-
-
-    @staticmethod
-    def _find_figi(df: pd.DataFrame, field_name: str) -> Union[str, None]:
-        if len(df) == 0 or field_name not in df.columns:
-            return None
-
-        result = df[field_name].dropna().unique()
-        
-        if (len(result) != 1):
-            print(f'[WARN] Multiple ({len(result)}) FIGI records was found')
-            return None
-
-        return result[0]
-
-
-    @staticmethod
-    def _find_name(df: pd.DataFrame) -> Union[str, None]:
-        if len(df) == 0 or 'DS002_sd' not in df.columns:
-            return None
-            
-         result = df['DS002_sd'].dropna().unique()
-        
-        if (len(result) != 1):
-            print(f'[WARN] Multiple ({len(result)}) name records was found')
-            return None
-
-        return result[0]
-
-
-    def search(self, ticker: str, asset_type: str = 'Curncy') -> Union[AssetInfo, None]:
-        """Return FIGI for pair"""
-        
-        response_df = OpenFigiProvider._send_request(ticker, asset_type)
-
-        figi = OpenFigiProvider._find_figi(response_df, 'kkg_pairFIGI_sd')
-
-        if figi is None:
-            base_quote = ticker.split('-')[0]
-            print(f'[INFO] {ticker} > Try to search using base quote {base_quote}')
-
-            response_df = OpenFigiProvider._send_request(base_quote, asset_type)
-            figi = OpenFigiProvider._find_figi(response_df, 'kkg_baseAssetFigi_sd')
-
-            if figi is None:
-                return None
-
-        return AssetInfo(figi, ticker, None, None)
-
-
-#%%
-figi_provider = OpenFigiProvider()
-
-assert figi_provider.search('WAX-USD') == None
-assert figi_provider.search('ABCD') == None
-
-
-# %% Tests
-expected_pairs = {
-    'BNB-USD': 'KKG000007HZ5',
-    'ETH-USD': 'BBG00J3NBWD7',
-    'BTC-USD': 'BBG006FCL7J4',
-    'SOL-USD': 'BBG013WVY457',
-    'UNI-USD': 'BBG013TZFVW3',
-    'SUSHI-USD': 'KKG0000010W1',
-    'AVAX-USD': 'KKG000007J36'
-}
-
-
-for k, v in expected_pairs.items():
-    actual = figi_provider.search(k)
-    print(actual.as_dict())
-    assert (
-        isinstance(actual, AssetInfo)
-        and actual.FIGI == v
-        and actual.Ticker == k
-    )
-
-
-# %% Get assets for searching figi
-pair_names = [x[:-4] for x in os.listdir("../data")]
-
-def insert_dash(text: str, position: int) -> str:
-    if '-' not in text:
-        return text[:position] + '-' + text[position:]
-    else:
-        return text
-
-usd_pairs = [
-    insert_dash(s.upper(), 3)
-    for s in pair_names if "usd" in s
-]
-
-print(usd_pairs[1:10])
-
-
-# %%
-figi_provider = OpenFigiProvider()
-pair_figi_list = [figi_provider.search(p) for p in usd_pairs]
-
-
-# %% ----
-conn_settings = ConnectionSettings(server='****.database.windows.net', database='market-data-db', username='<user>', password='****')
-db_conn = AzureDbConnection(conn_settings)
-
-db_conn.connect()
-for t in db_conn.get_tables():
-    print(t)
-
-
-# %%
-db_mapping = {
-    'Figi': types.CHAR(length=12),
-    'Ticker': types.VARCHAR(length=12)
-}
-
-figi_df = pd.DataFrame([t.as_dict() for t in pair_figi_list if isinstance(t, AssetInfo)])
-db_conn.insert(figi_df, 'figi', db_mapping)
-
-
-# %%
-db_conn.dispose()
-print('Completed')
Author	SHA1	Message	Date
codez0mb1e	28450cdeab	Processing exceptions	2022-10-17 22:44:14 +00:00
codez0mb1e	3882efc9a2	Update parser	2022-10-17 17:02:23 +00:00
codez0mb1e	aa7d45380b	Remove redundant	2022-10-17 16:08:16 +00:00
codez0mb1e	138587cd6b	Update SQL driver and minor improvements	2022-10-17 12:40:22 +00:00