From 37ec6bef9388b3e6c60783cc964cc4de00a4fed4 Mon Sep 17 00:00:00 2001
From: codez0mb1e <d.petukhov@outlook.com>
Date: Sat, 22 Oct 2022 09:25:59 +0000
Subject: [PATCH] Remove redundant

---
 src/azure.py                  |  66 ---------------
 src/bitfinex_crypto_parser.py | 150 ----------------------------------
 2 files changed, 216 deletions(-)
 delete mode 100644 src/azure.py
 delete mode 100644 src/bitfinex_crypto_parser.py

diff --git a/src/azure.py b/src/azure.py
deleted file mode 100644
index cdfcf77..0000000
--- a/src/azure.py
+++ /dev/null
@@ -1,66 +0,0 @@
-
-# %% Import dependencies ----
-from dataclasses import dataclass
-from typing import Dict, Any
-
-from sqlalchemy import create_engine, inspect
-
-import pandas as pd
-import urllib
-
-
-# %% Models
-@dataclass(frozen=True)
-class ConnectionSettings:
-    """Connection Settings"""
-    server: str
-    database: str
-    username: str
-    password: str
-    driver: str = '{ODBC Driver 18 for SQL Server}'
-    timeout: int = 30
-
-
-# %% Connection
-class AzureDbConnection:
-    """Azure SQL database connection."""
-
-    def __init__(self, conn_settings: ConnectionSettings, echo: bool = False) -> None:
-        conn_params = urllib.parse.quote_plus(
-            'Driver=%s;' % conn_settings.driver +
-            'Server=tcp:%s.database.windows.net,1433;' % conn_settings.server +
-            'Database=%s;' % conn_settings.database +
-            'Uid=%s;' % conn_settings.username +
-            'Pwd=%s;' % conn_settings.password +
-            'Encrypt=yes;' +
-            'TrustServerCertificate=no;' +
-            'Connection Timeout=%s;' % conn_settings.timeout
-        )
-        conn_string = f'mssql+pyodbc:///?odbc_connect={conn_params}'
-
-        self._db = create_engine(conn_string, echo=echo)
-
-    def connect(self) -> None:
-        """Estimate connection"""
-        self._conn = self._db.connect()
-
-    def get_tables(self) -> list[str]:
-        """Get list of tables"""
-        inspector = inspect(self._db)
-        return [t for t in inspector.get_table_names()]
-
-    def insert(self, inserted_data: pd.DataFrame, target_table: str, db_mapping: Dict[str, Any], chunksize: int = 10000) -> None:
-        inserted_data.to_sql(
-            con=self._db,
-            schema='dbo',
-            name=target_table,
-            if_exists='append',  # or replace
-            index=False,
-            chunksize=chunksize,
-            dtype=db_mapping
-        )
-
-    def dispose(self) -> None:
-        """Dispose opened connections"""
-        self._conn.close()
-        self._db.dispose()
diff --git a/src/bitfinex_crypto_parser.py b/src/bitfinex_crypto_parser.py
deleted file mode 100644
index 86c9f61..0000000
--- a/src/bitfinex_crypto_parser.py
+++ /dev/null
@@ -1,150 +0,0 @@
-#!/usr/bin/python3
-
-"""
-Data source: https://www.kaggle.com/datasets/tencars/392-crypto-currency-pairs-at-minute-resolution
-"""
-
-# %%
-import os
-
-import numpy as np
-import pandas as pd
-from sqlalchemy import types
-
-from azure import AzureDbConnection, ConnectionSettings
-
-# %%
-
-# In terminal:
-#> kaggle -v # must be >1.15
-#> mkdir data; cd data
-#> kaggle datasets download tencars/392-crypto-currency-pairs-at-minute-resolution
-#> unzip 392-crypto-currency-pairs-at-minute-resolution.zip
-
-input_dir: str = "../data"
-
-# Get names and number of available currency pairs
-pair_names = [x[:-4] for x in os.listdir(input_dir)]
-usd_pairs = [s for s in pair_names if "usd" in s]
-
-# Print the first 50 currency pair names
-print(f"These are the first 10 out of {len(usd_pairs)} currency pairs in the dataset:")
-print(usd_pairs[0:10])
-
-
-# %%
-
-def load_data(symbol: str, input_dir: str) -> pd.DataFrame:
-    path_name = input_dir + "/" + symbol + ".csv"
-
-    # Load data
-    df = pd.read_csv(path_name, index_col='time', dtype={'open': np.float64, 'high': np.float64, 'low': np.float64, 'close': np.float64, 'volume': np.float64})
-    df.index = pd.to_datetime(df.index, unit='ms')
-    df = df[~df.index.duplicated(keep='first')]
-
-    # As mentioned in the description, bins without any change are not recorded.
-    # We have to fill these gaps by filling them with the last value until a change occurs.
-    #df = df.resample('1T').pad()
-
-    return df[['open', 'high', 'low', 'close', 'volume']]
-
-
-def calc_ohlcv_1h(df: pd.DataFrame) -> pd.DataFrame:
-    df['hour'] = df.index.to_period('H')
-    
-    return (
-        df
-            .groupby(['hour'])
-            .agg(
-                {
-                    'open': 'first',
-                    'high': max,
-                    'low': min,
-                    'close': 'last',
-                    'volume': sum,
-                    #'time': max
-                }
-            )
-            .reset_index()
-        )
-
-
-# %% ----
-ethusd_1m = load_data("ethusd", input_dir)
-ethusd_1h = calc_ohlcv_1h(ethusd_1m)
-
-ethusd_1h.tail()
-
-
-
-# %% ----
-conn_settings = ConnectionSettings(
-    '<server_name>',
-    '<db_name>',
-    '<user_name>',
-    '****'
-)
-
-db_conn = AzureDbConnection(conn_settings)
-db_conn.connect()
-
-for t in db_conn.get_tables():
-    print(t)
-
-
-# %%
-db_mapping = {
-    'FIGI': types.CHAR(length=12),
-    'open': types.DECIMAL(precision=19, scale=9),
-    'high': types.DECIMAL(precision=19, scale=9),
-    'close': types.DECIMAL(precision=19, scale=9),
-    'low': types.DECIMAL(precision=19, scale=9),
-    'volume': types.DECIMAL(precision=19, scale=9),
-    'time': types.DATETIME(),
-    'source_id': types.SMALLINT(),
-    'version': types.VARCHAR(length=12),
-    'interval': types.CHAR(length=2)
-}
-
-
-# %%
-pd.options.mode.chained_assignment = None 
-
-min_candels_n = 10000
-
-i = 1
-for pair in usd_pairs:
-    print(f'INFO | {pair} > Starting read dataset...')
-
-    candles_df = load_data(pair, input_dir)
-
-    if len(candles_df) > min_candels_n:
-
-        df = candles_df.loc[:'2022-10-01']
-
-        if len(df) > 0:
-            df = calc_ohlcv_1h(df)
-
-            df['FIGI'] = pair
-            df['time'] = df.hour.apply(lambda h: h.to_timestamp())
-            df['source_id'] = 1
-            df['version'] = 'v20221001'
-            df['interval'] = '1H'
-            df.drop(columns='hour', inplace=True)
-
-            print(f'INFO | {pair} > Starting insert to DB ({i} of {len(usd_pairs)})...')
-            print('DEBUG | {} rows from {} to {}'.format(df.shape[0], min(df['time']), max(df['time'])))
-            try:
-                db_conn.insert(df, 'crypto_1h', db_mapping)
-            except Exception as ex:
-                print(f'ERROR | {pair} > {ex}')
-
-        else:
-            print(f'WARN | {pair} > No new records')
-    else:
-        print(f'WARN | {pair} > Only {candles_df.shape[0]} records')
-    i += 1
-
-
-# %%
-db_conn.dispose()