Remove redundant

2024-11-21 18:02:23 +00:00 · 2022-10-22 09:25:59 +00:00 · 2022-10-22 09:25:59 +00:00 · 37ec6bef93
commit 37ec6bef93
parent 5c6ea177dd
2 changed files with 0 additions and 216 deletions
--- a/src/azure.py
+++ b/src/azure.py
@ -1,66 +0,0 @@
 # %% Import dependencies ----
 from dataclasses import dataclass
 from typing import Dict, Any
 from sqlalchemy import create_engine, inspect
 import pandas as pd
 import urllib
 # %% Models
@dataclass(frozen=True)
 class ConnectionSettings:
    """Connection Settings"""
    server: str
    database: str
    username: str
    password: str
    driver: str = '{ODBC Driver 18 for SQL Server}'
    timeout: int = 30
 # %% Connection
 class AzureDbConnection:
    """Azure SQL database connection."""
    def __init__(self, conn_settings: ConnectionSettings, echo: bool = False) -> None:
        conn_params = urllib.parse.quote_plus(
            'Driver=%s;' % conn_settings.driver +
            'Server=tcp:%s.database.windows.net,1433;' % conn_settings.server +
            'Database=%s;' % conn_settings.database +
            'Uid=%s;' % conn_settings.username +
            'Pwd=%s;' % conn_settings.password +
            'Encrypt=yes;' +
            'TrustServerCertificate=no;' +
            'Connection Timeout=%s;' % conn_settings.timeout
        )
        conn_string = f'mssql+pyodbc:///?odbc_connect={conn_params}'
        self._db = create_engine(conn_string, echo=echo)
    def connect(self) -> None:
        """Estimate connection"""
        self._conn = self._db.connect()
    def get_tables(self) -> list[str]:
        """Get list of tables"""
        inspector = inspect(self._db)
        return [t for t in inspector.get_table_names()]
    def insert(self, inserted_data: pd.DataFrame, target_table: str, db_mapping: Dict[str, Any], chunksize: int = 10000) -> None:
        inserted_data.to_sql(
            con=self._db,
            schema='dbo',
            name=target_table,
            if_exists='append',  # or replace
            index=False,
            chunksize=chunksize,
            dtype=db_mapping
        )
    def dispose(self) -> None:
        """Dispose opened connections"""
        self._conn.close()
        self._db.dispose()
--- a/src/bitfinex_crypto_parser.py
+++ b/src/bitfinex_crypto_parser.py
@ -1,150 +0,0 @@
 #!/usr/bin/python3
 """
 Data source: https://www.kaggle.com/datasets/tencars/392-crypto-currency-pairs-at-minute-resolution
 """
 # %%
 import os
 import numpy as np
 import pandas as pd
 from sqlalchemy import types
 from azure import AzureDbConnection, ConnectionSettings
 # %%
 # In terminal:
 #> kaggle -v # must be >1.15
 #> mkdir data; cd data
 #> kaggle datasets download tencars/392-crypto-currency-pairs-at-minute-resolution
 #> unzip 392-crypto-currency-pairs-at-minute-resolution.zip
 input_dir: str = "../data"
 # Get names and number of available currency pairs
 pair_names = [x[:-4] for x in os.listdir(input_dir)]
 usd_pairs = [s for s in pair_names if "usd" in s]
 # Print the first 50 currency pair names
 print(f"These are the first 10 out of {len(usd_pairs)} currency pairs in the dataset:")
 print(usd_pairs[0:10])
 # %%
 def load_data(symbol: str, input_dir: str) -> pd.DataFrame:
    path_name = input_dir + "/" + symbol + ".csv"
    # Load data
    df = pd.read_csv(path_name, index_col='time', dtype={'open': np.float64, 'high': np.float64, 'low': np.float64, 'close': np.float64, 'volume': np.float64})
    df.index = pd.to_datetime(df.index, unit='ms')
    df = df[~df.index.duplicated(keep='first')]
    # As mentioned in the description, bins without any change are not recorded.
    # We have to fill these gaps by filling them with the last value until a change occurs.
    #df = df.resample('1T').pad()
    return df[['open', 'high', 'low', 'close', 'volume']]
 def calc_ohlcv_1h(df: pd.DataFrame) -> pd.DataFrame:
    df['hour'] = df.index.to_period('H')
    return (
        df
            .groupby(['hour'])
            .agg(
                {
                    'open': 'first',
                    'high': max,
                    'low': min,
                    'close': 'last',
                    'volume': sum,
                    #'time': max
                }
            )
            .reset_index()
        )
 # %% ----
 ethusd_1m = load_data("ethusd", input_dir)
 ethusd_1h = calc_ohlcv_1h(ethusd_1m)
 ethusd_1h.tail()
 # %% ----
 conn_settings = ConnectionSettings(
    '<server_name>',
    '<db_name>',
    '<user_name>',
    '****'
 )
 db_conn = AzureDbConnection(conn_settings)
 db_conn.connect()
 for t in db_conn.get_tables():
    print(t)
 # %%
 db_mapping = {
    'FIGI': types.CHAR(length=12),
    'open': types.DECIMAL(precision=19, scale=9),
    'high': types.DECIMAL(precision=19, scale=9),
    'close': types.DECIMAL(precision=19, scale=9),
    'low': types.DECIMAL(precision=19, scale=9),
    'volume': types.DECIMAL(precision=19, scale=9),
    'time': types.DATETIME(),
    'source_id': types.SMALLINT(),
    'version': types.VARCHAR(length=12),
    'interval': types.CHAR(length=2)
 }
 # %%
 pd.options.mode.chained_assignment = None 
 min_candels_n = 10000
 i = 1
 for pair in usd_pairs:
    print(f'INFO | {pair} > Starting read dataset...')
    candles_df = load_data(pair, input_dir)
    if len(candles_df) > min_candels_n:
        df = candles_df.loc[:'2022-10-01']
        if len(df) > 0:
            df = calc_ohlcv_1h(df)
            df['FIGI'] = pair
            df['time'] = df.hour.apply(lambda h: h.to_timestamp())
            df['source_id'] = 1
            df['version'] = 'v20221001'
            df['interval'] = '1H'
            df.drop(columns='hour', inplace=True)
            print(f'INFO | {pair} > Starting insert to DB ({i} of {len(usd_pairs)})...')
            print('DEBUG | {} rows from {} to {}'.format(df.shape[0], min(df['time']), max(df['time'])))
            try:
                db_conn.insert(df, 'crypto_1h', db_mapping)
            except Exception as ex:
                print(f'ERROR | {pair} > {ex}')
        else:
            print(f'WARN | {pair} > No new records')
    else:
        print(f'WARN | {pair} > Only {candles_df.shape[0]} records')
    i += 1
 # %%
 db_conn.dispose()