diff --git a/src/binance_api_client.py b/src/binance_api_client.py new file mode 100644 index 0000000..5863e35 --- /dev/null +++ b/src/binance_api_client.py @@ -0,0 +1,29 @@ +# %% +import numpy as np +import pandas as pd +import time + +from binance.client import Client + + +# %% +api_key = "****" +secret_key = "***" + +client = Client(api_key, secret_key) + + +# %% +coins_response = client.get_all_coins_info() +coins_df = pd.DataFrame.from_dict(coins_response, orient='columns') + + +# %% +pairs_list = coins_df.coin.apply(lambda x: f"{x}USDT") +client.get_historical_klines( + 'BTCUSDT', + interval=Client.KLINE_INTERVAL_1HOUR, + start_str='2022-04-21', + end_str='2022-04-22' +) + diff --git a/src/binance_open_data.ipynb b/src/binance_open_data.ipynb new file mode 100644 index 0000000..dfe4396 --- /dev/null +++ b/src/binance_open_data.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Binance Open Data lab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Binance Open Data](https://github.com/binance/binance-public-data/#klines) and analyze it.\n", + "\n", + "### Stet 1. Download data \n", + "\n", + "Downloading __1-minute candles__ for `BTC/USDT` and `BTC/UDSC` using `bash` or `powershell` scripts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "#!/bin/sh\n", + "\n", + "# create dir for data\n", + "!mkdir ../data\n", + "\n", + "# download data using GET request\n", + "!wget -N -P ../data https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", + "!wget -N -P../data https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", + "\n", + "# unzip\n", + "!unzip -o -d ../data ../data/BTCUSDT-1m-2022-06-21.zip \n", + "!unzip -o -d ../data ../data/BTCUSDC-1m-2022-06-21.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Import data to Dataframe \n", + "\n", + "Import packages for data analysis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import httpx\n", + "\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import data from CSV file to Pandas DataFrame:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_data(pair: str) -> pd.DataFrame:\n", + " return pd.read_csv(f'../data/{pair}-1m-2022-06-21.csv', header = None)\n", + "\n", + "btcusdt_df = get_data('BTCUSDT')\n", + "btcusdt_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set names to columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def set_column_names(df: pd.DataFrame) -> pd.DataFrame:\n", + " column_names_mapping = {\n", + " 0: 'Open_time',\n", + " 1: 'Open',\n", + " 2: 'High',\n", + " 3: 'Low',\n", + " 4: 'Close',\n", + " 5: 'Volume',\n", + " 6: 'Close_time',\n", + " 7: 'Quote_asset_volume',\n", + " 8: 'Number_of_trades',\n", + " 9: 'Taker_buy_base_asset_volume',\n", + " 10: 'Taker_buy_quote_asset_volume',\n", + " 11: 'Ignore'\n", + " }\n", + " return df.rename(columns=column_names_mapping)\n", + "\n", + "btcusdt_df = set_column_names(btcusdt_df)\n", + "btcusdt_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Convert timestamp to human-readable date and time format:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "btcusdt_df['Open_time'] = btcusdt_df.iloc[:, 0].apply(lambda t: datetime.fromtimestamp(t/1000))\n", + "btcusdt_df['Close_time'] = btcusdt_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000))\n", + "\n", + "btcusdt_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at _Descriptive statistics_ (min, mean, max, standard deviation):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "btcusdt_df.describe(datetime_is_numeric=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Transform data\n", + "\n", + "Calculate __1-hour OHLCV__ candles:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_ohclv(df: pd.DataFrame) -> pd.DataFrame:\n", + " df['hour'] = df['Close_time'].apply(lambda t: t.hour)\n", + "\n", + " return (\n", + " df\n", + " .groupby(['hour'])\n", + " .agg(\n", + " {\n", + " 'Open': 'first',\n", + " 'High': max,\n", + " 'Low': min,\n", + " 'Close': 'last',\n", + " 'Volume': sum,\n", + " 'Close_time': max\n", + " }\n", + " )\n", + " .reset_index()\n", + " .drop(columns=['hour'])\n", + " )\n", + "\n", + "btcusdt_1h_df = calculate_ohclv(btcusdt_df)\n", + "\n", + "btcusdt_1h_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data validation is very important. Let's write domain-driven asserts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert(\n", + " isinstance(btcusdt_1h_df, pd.DataFrame)\n", + " and btcusdt_1h_df.shape == (24, 6)\n", + " and not btcusdt_1h_df.isnull().any().any()\n", + " and btcusdt_1h_df.iloc[:, 0:5].ge(0).all().all()\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Expand the dataset with information about `BTC/USDC` \n", + "\n", + "Download `BTC/USDC` 1-minute candles and transform it to 1-hour candles:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "btcusdc_df = get_data('BTCUSDC') # download data\n", + "btcusdc_df = set_column_names(btcusdc_df) # set column names\n", + "btcusdc_df['Close_time'] = btcusdc_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000)) # convert timestamp to date+time\n", + "\n", + "btcusdc_1h_df = calculate_ohclv(btcusdc_df) # calculate 1h OHCLV candles\n", + "btcusdc_1h_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Join altogether:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "btcusdt_1h_df['pair'] = 'BTC-USDT'\n", + "btcusdc_1h_df['pair'] = 'BTC-USDC'\n", + "\n", + "# Join datasets\n", + "candles_1h_df = pd.concat([btcusdt_1h_df, btcusdc_1h_df])\n", + "\n", + "# Validate result\n", + "assert(\n", + " isinstance(candles_1h_df, pd.DataFrame)\n", + " and candles_1h_df.shape == (48, 7)\n", + " and (candles_1h_df['pair'].unique() == ['BTC-USDT', 'BTC-USDC']).all()\n", + ")\n", + "\n", + "# Sort output by Close_time\n", + "candles_1h_df.sort_values('Close_time')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.13 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "6fd7ff10be7e3a66c1b3745c4cbc00041a2589eb74ab4be46a3698a7b56001aa" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/candidate_tests.ipynb b/src/candidate_tests.ipynb deleted file mode 100644 index 4b9e15e..0000000 --- a/src/candidate_tests.ipynb +++ /dev/null @@ -1,2176 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Analyst Code Interview\n", - "\n", - "Hi,\n", - "\n", - "It's a simple code interview. I shouldn't take you more than an hour, and it's fun :)\n", - "\n", - "## Part I. Quizzes\n", - "\n", - "If you didn't have `Python badge` on LinkedIn then it's time [to do it](https://www.linkedin.com/skill-assessments/hub/quizzes/)! Please share your results with us. By the way, here's my badge.\n", - "\n", - "![](../docs/li.png)\n", - "\n", - "If you don't have a LinkedIn account or like quizzes, then take another test from [W3 School](https://www.w3schools.com/quiztest/quiztest.asp?qtest=PANDAS).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Part II. Coding\n", - "\n", - "Please share the screen with us and let's repair our program together. \n", - "You can use google.com, StackOverflow, or your favorite IDE. Please use a version of Python greater or equals than 3.8\n", - "\n", - "We intend to collect data from [Binance Open Data](https://github.com/binance/binance-public-data/#klines) and analyze it.\n", - "\n", - "### Stet 1. Download data \n", - "\n", - "Downloading __1-minute candles__ for `BTC/USDT` and `BTC/UDSC` using `bash` or `powershell` scripts:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "vscode": { - "languageId": "shellscript" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mkdir: cannot create directory ‘../data’: File exists\n", - "--2022-06-24 11:22:56-- https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", - "Resolving data.binance.vision (data.binance.vision)... 108.138.94.79, 108.138.94.48, 108.138.94.67, ...\n", - "Connecting to data.binance.vision (data.binance.vision)|108.138.94.79|:443... connected.\n", - "HTTP request sent, awaiting response... 304 Not Modified\n", - "File ‘../data/BTCUSDT-1m-2022-06-21.zip’ not modified on server. Omitting download.\n", - "\n", - "--2022-06-24 11:22:57-- https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", - "Resolving data.binance.vision (data.binance.vision)... 108.138.94.88, 108.138.94.67, 108.138.94.48, ...\n", - "Connecting to data.binance.vision (data.binance.vision)|108.138.94.88|:443... connected.\n", - "HTTP request sent, awaiting response... 304 Not Modified\n", - "File ‘../data/BTCUSDC-1m-2022-06-21.zip’ not modified on server. Omitting download.\n", - "\n", - "Archive: ../data/BTCUSDT-1m-2022-06-21.zip\n", - " inflating: ../data/BTCUSDT-1m-2022-06-21.csv \n", - "Archive: ../data/BTCUSDC-1m-2022-06-21.zip\n", - " inflating: ../data/BTCUSDC-1m-2022-06-21.csv \n" - ] - } - ], - "source": [ - "#!/bin/sh\n", - "\n", - "# create dir for data\n", - "!mkdir ../data\n", - "\n", - "# download data using GET request\n", - "!wget -N -P ../data https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n", - "!wget -N -P../data https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n", - "\n", - "# unzip\n", - "!unzip -o -d ../data ../data/BTCUSDT-1m-2022-06-21.zip \n", - "!unzip -o -d ../data ../data/BTCUSDC-1m-2022-06-21.zip" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 2: Import data to Dataframe \n", - "\n", - "Import packages for data analysis:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "import httpx\n", - "\n", - "from datetime import datetime" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import data from CSV file to Pandas DataFrame:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
01234567891011
0165576960000020573.9020590.0020552.1720558.3670.7692516557696599991.455321e+06115037.368217.683845e+050
1165576966000020558.3520611.2120558.3520606.70118.0603216557697199992.430514e+06140261.257601.260950e+060
2165576972000020606.6920626.8920552.4020552.40130.4289416557697799992.686026e+06143355.805731.149409e+060
3165576978000020552.4120585.6920539.0920578.89103.5631816557698399992.128819e+06130164.573461.327338e+060
4165576984000020578.8920579.9020537.5720554.4683.5550916557698999991.717907e+06109836.409447.485065e+050
\n", - "
" - ], - "text/plain": [ - " 0 1 2 3 4 5 \\\n", - "0 1655769600000 20573.90 20590.00 20552.17 20558.36 70.76925 \n", - "1 1655769660000 20558.35 20611.21 20558.35 20606.70 118.06032 \n", - "2 1655769720000 20606.69 20626.89 20552.40 20552.40 130.42894 \n", - "3 1655769780000 20552.41 20585.69 20539.09 20578.89 103.56318 \n", - "4 1655769840000 20578.89 20579.90 20537.57 20554.46 83.55509 \n", - "\n", - " 6 7 8 9 10 11 \n", - "0 1655769659999 1.455321e+06 1150 37.36821 7.683845e+05 0 \n", - "1 1655769719999 2.430514e+06 1402 61.25760 1.260950e+06 0 \n", - "2 1655769779999 2.686026e+06 1433 55.80573 1.149409e+06 0 \n", - "3 1655769839999 2.128819e+06 1301 64.57346 1.327338e+06 0 \n", - "4 1655769899999 1.717907e+06 1098 36.40944 7.485065e+05 0 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def get_data(pair: str) -> pd.DataFrame:\n", - " return pd.read_csv(f'../data/binance/{pair}-1m-2022-06-21.csv', header = None)\n", - "\n", - "btcusdt_df = get_data('BTCUSDT')\n", - "btcusdt_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set names to columns:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Open_timeOpenHighLowCloseVolumeClose_timeQuote_asset_volumeNumber_of_tradesTaker_buy_base_asset_volumeTaker_buy_quote_asset_volumeIgnore
0165576960000020573.9020590.0020552.1720558.3670.7692516557696599991.455321e+06115037.368217.683845e+050
1165576966000020558.3520611.2120558.3520606.70118.0603216557697199992.430514e+06140261.257601.260950e+060
2165576972000020606.6920626.8920552.4020552.40130.4289416557697799992.686026e+06143355.805731.149409e+060
3165576978000020552.4120585.6920539.0920578.89103.5631816557698399992.128819e+06130164.573461.327338e+060
4165576984000020578.8920579.9020537.5720554.4683.5550916557698999991.717907e+06109836.409447.485065e+050
\n", - "
" - ], - "text/plain": [ - " Open_time Open High Low Close Volume \\\n", - "0 1655769600000 20573.90 20590.00 20552.17 20558.36 70.76925 \n", - "1 1655769660000 20558.35 20611.21 20558.35 20606.70 118.06032 \n", - "2 1655769720000 20606.69 20626.89 20552.40 20552.40 130.42894 \n", - "3 1655769780000 20552.41 20585.69 20539.09 20578.89 103.56318 \n", - "4 1655769840000 20578.89 20579.90 20537.57 20554.46 83.55509 \n", - "\n", - " Close_time Quote_asset_volume Number_of_trades \\\n", - "0 1655769659999 1.455321e+06 1150 \n", - "1 1655769719999 2.430514e+06 1402 \n", - "2 1655769779999 2.686026e+06 1433 \n", - "3 1655769839999 2.128819e+06 1301 \n", - "4 1655769899999 1.717907e+06 1098 \n", - "\n", - " Taker_buy_base_asset_volume Taker_buy_quote_asset_volume Ignore \n", - "0 37.36821 7.683845e+05 0 \n", - "1 61.25760 1.260950e+06 0 \n", - "2 55.80573 1.149409e+06 0 \n", - "3 64.57346 1.327338e+06 0 \n", - "4 36.40944 7.485065e+05 0 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def set_column_names(df: pd.DataFrame) -> pd.DataFrame:\n", - " column_names_mapping = {\n", - " 0: 'Open_time',\n", - " 1: 'Open',\n", - " 2: 'High',\n", - " 3: 'Low',\n", - " 4: 'Close',\n", - " 5: 'Volume',\n", - " 6: 'Close_time',\n", - " 7: 'Quote_asset_volume',\n", - " 8: 'Number_of_trades',\n", - " 9: 'Taker_buy_base_asset_volume',\n", - " 10: 'Taker_buy_quote_asset_volume',\n", - " 11: 'Ignore'\n", - " }\n", - " return df.rename(columns=column_names_mapping)\n", - "\n", - "btcusdt_df = set_column_names(btcusdt_df)\n", - "btcusdt_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Convert timestamp to human-readable date and time format:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Open_timeOpenHighLowCloseVolumeClose_timeQuote_asset_volumeNumber_of_tradesTaker_buy_base_asset_volumeTaker_buy_quote_asset_volumeIgnore
02022-06-21 00:00:0020573.9020590.0020552.1720558.3670.769252022-06-21 00:00:59.9991.455321e+06115037.368217.683845e+050
12022-06-21 00:01:0020558.3520611.2120558.3520606.70118.060322022-06-21 00:01:59.9992.430514e+06140261.257601.260950e+060
22022-06-21 00:02:0020606.6920626.8920552.4020552.40130.428942022-06-21 00:02:59.9992.686026e+06143355.805731.149409e+060
32022-06-21 00:03:0020552.4120585.6920539.0920578.89103.563182022-06-21 00:03:59.9992.128819e+06130164.573461.327338e+060
42022-06-21 00:04:0020578.8920579.9020537.5720554.4683.555092022-06-21 00:04:59.9991.717907e+06109836.409447.485065e+050
\n", - "
" - ], - "text/plain": [ - " Open_time Open High Low Close Volume \\\n", - "0 2022-06-21 00:00:00 20573.90 20590.00 20552.17 20558.36 70.76925 \n", - "1 2022-06-21 00:01:00 20558.35 20611.21 20558.35 20606.70 118.06032 \n", - "2 2022-06-21 00:02:00 20606.69 20626.89 20552.40 20552.40 130.42894 \n", - "3 2022-06-21 00:03:00 20552.41 20585.69 20539.09 20578.89 103.56318 \n", - "4 2022-06-21 00:04:00 20578.89 20579.90 20537.57 20554.46 83.55509 \n", - "\n", - " Close_time Quote_asset_volume Number_of_trades \\\n", - "0 2022-06-21 00:00:59.999 1.455321e+06 1150 \n", - "1 2022-06-21 00:01:59.999 2.430514e+06 1402 \n", - "2 2022-06-21 00:02:59.999 2.686026e+06 1433 \n", - "3 2022-06-21 00:03:59.999 2.128819e+06 1301 \n", - "4 2022-06-21 00:04:59.999 1.717907e+06 1098 \n", - "\n", - " Taker_buy_base_asset_volume Taker_buy_quote_asset_volume Ignore \n", - "0 37.36821 7.683845e+05 0 \n", - "1 61.25760 1.260950e+06 0 \n", - "2 55.80573 1.149409e+06 0 \n", - "3 64.57346 1.327338e+06 0 \n", - "4 36.40944 7.485065e+05 0 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "btcusdt_df['Open_time'] = btcusdt_df.iloc[:, 0].apply(lambda t: datetime.fromtimestamp(t/1000))\n", - "btcusdt_df['Close_time'] = btcusdt_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000))\n", - "\n", - "btcusdt_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's take a look at _Descriptive statistics_ (min, mean, max, standard deviation):" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Open_timeOpenHighLowCloseVolumeClose_timeQuote_asset_volumeNumber_of_tradesTaker_buy_base_asset_volumeTaker_buy_quote_asset_volumeIgnore
count14401440.0000001440.0000001440.0000001440.0000001440.00000014401.440000e+031440.0000001440.0000001.440000e+031440.0
mean2022-06-21 11:59:3021014.35546521033.87731920996.87103521014.45808372.4794352022-06-21 12:00:29.9990003201.526793e+061052.26458336.2452087.635009e+050.0
min2022-06-21 00:00:0020377.84000020396.83000020348.40000020377.8500005.3914202022-06-21 00:00:59.9990001.128460e+05227.0000001.9779604.127662e+040.0
25%2022-06-21 05:59:4520737.96500020763.56750020713.95500020737.97000034.1392272022-06-21 06:00:44.9990000647.153174e+05636.00000015.9056503.315046e+050.0
50%2022-06-21 11:59:3021073.58500021095.07500021052.21000021073.59000052.6157302022-06-21 12:00:29.9990000641.100389e+06859.00000024.9484955.249121e+050.0
75%2022-06-21 17:59:1521241.47000021260.69000021224.11500021241.46750082.0582072022-06-21 18:00:14.9990000641.718243e+061225.25000039.8596558.356794e+050.0
max2022-06-21 23:59:0021691.55000021723.00000021631.91000021691.550000732.1814002022-06-21 23:59:59.9990001.571181e+078776.000000471.9332101.013001e+070.0
stdNaN325.066922325.563767324.575912324.95290871.230479NaN1.511238e+06736.29048041.5594218.819089e+050.0
\n", - "
" - ], - "text/plain": [ - " Open_time Open High Low \\\n", - "count 1440 1440.000000 1440.000000 1440.000000 \n", - "mean 2022-06-21 11:59:30 21014.355465 21033.877319 20996.871035 \n", - "min 2022-06-21 00:00:00 20377.840000 20396.830000 20348.400000 \n", - "25% 2022-06-21 05:59:45 20737.965000 20763.567500 20713.955000 \n", - "50% 2022-06-21 11:59:30 21073.585000 21095.075000 21052.210000 \n", - "75% 2022-06-21 17:59:15 21241.470000 21260.690000 21224.115000 \n", - "max 2022-06-21 23:59:00 21691.550000 21723.000000 21631.910000 \n", - "std NaN 325.066922 325.563767 324.575912 \n", - "\n", - " Close Volume Close_time \\\n", - "count 1440.000000 1440.000000 1440 \n", - "mean 21014.458083 72.479435 2022-06-21 12:00:29.999000320 \n", - "min 20377.850000 5.391420 2022-06-21 00:00:59.999000 \n", - "25% 20737.970000 34.139227 2022-06-21 06:00:44.999000064 \n", - "50% 21073.590000 52.615730 2022-06-21 12:00:29.999000064 \n", - "75% 21241.467500 82.058207 2022-06-21 18:00:14.999000064 \n", - "max 21691.550000 732.181400 2022-06-21 23:59:59.999000 \n", - "std 324.952908 71.230479 NaN \n", - "\n", - " Quote_asset_volume Number_of_trades Taker_buy_base_asset_volume \\\n", - "count 1.440000e+03 1440.000000 1440.000000 \n", - "mean 1.526793e+06 1052.264583 36.245208 \n", - "min 1.128460e+05 227.000000 1.977960 \n", - "25% 7.153174e+05 636.000000 15.905650 \n", - "50% 1.100389e+06 859.000000 24.948495 \n", - "75% 1.718243e+06 1225.250000 39.859655 \n", - "max 1.571181e+07 8776.000000 471.933210 \n", - "std 1.511238e+06 736.290480 41.559421 \n", - "\n", - " Taker_buy_quote_asset_volume Ignore \n", - "count 1.440000e+03 1440.0 \n", - "mean 7.635009e+05 0.0 \n", - "min 4.127662e+04 0.0 \n", - "25% 3.315046e+05 0.0 \n", - "50% 5.249121e+05 0.0 \n", - "75% 8.356794e+05 0.0 \n", - "max 1.013001e+07 0.0 \n", - "std 8.819089e+05 0.0 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "btcusdt_df.describe(datetime_is_numeric=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 2: Transform data\n", - "\n", - "Calculate __1-hour OHLCV__ candles:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OpenHighLowCloseVolumeClose_time
020573.9020705.7420396.7720672.304235.773922022-06-21 00:59:59.999
120672.3020783.9920504.8920670.883892.914122022-06-21 01:59:59.999
220670.8720699.9320348.4020433.492876.889282022-06-21 02:59:59.999
320433.4920665.2620365.0020614.043306.770182022-06-21 03:59:59.999
420614.0420740.7220474.4120656.172925.335422022-06-21 04:59:59.999
520656.1721029.9320621.1420890.776516.656112022-06-21 05:59:59.999
620890.7721202.0020890.7721192.086114.516622022-06-21 06:59:59.999
721192.0821333.0020952.4621120.285433.086032022-06-21 07:59:59.999
821120.2821500.0121051.2521356.326241.368012022-06-21 08:59:59.999
921356.3221470.0021166.9421200.004961.574962022-06-21 09:59:59.999
1021200.0121307.8321061.5421299.273982.475782022-06-21 10:59:59.999
1121299.0221361.0320911.1320962.424685.425242022-06-21 11:59:59.999
1220962.4221163.2420853.3621106.205596.263032022-06-21 12:59:59.999
1321106.1921650.0020995.6421619.318667.590802022-06-21 13:59:59.999
1421619.3121723.0021427.8221590.796295.024292022-06-21 14:59:59.999
1521590.7921604.2721339.0721392.193809.546222022-06-21 15:59:59.999
1621392.1821550.3921355.7721401.522421.776292022-06-21 16:59:59.999
1721401.5221457.8221195.7021242.943755.829192022-06-21 17:59:59.999
1821242.9321256.9921076.4821100.012820.406752022-06-21 18:59:59.999
1921100.0021306.5120870.0120888.644015.335282022-06-21 19:59:59.999
2020888.6320987.3820666.0020859.864442.875962022-06-21 20:59:59.999
2120859.8621054.9920808.0020972.911813.562362022-06-21 21:59:59.999
2220972.9121003.7020741.0320897.002945.616502022-06-21 22:59:59.999
2320897.0020943.1720551.0020723.522613.774412022-06-21 23:59:59.999
\n", - "
" - ], - "text/plain": [ - " Open High Low Close Volume Close_time\n", - "0 20573.90 20705.74 20396.77 20672.30 4235.77392 2022-06-21 00:59:59.999\n", - "1 20672.30 20783.99 20504.89 20670.88 3892.91412 2022-06-21 01:59:59.999\n", - "2 20670.87 20699.93 20348.40 20433.49 2876.88928 2022-06-21 02:59:59.999\n", - "3 20433.49 20665.26 20365.00 20614.04 3306.77018 2022-06-21 03:59:59.999\n", - "4 20614.04 20740.72 20474.41 20656.17 2925.33542 2022-06-21 04:59:59.999\n", - "5 20656.17 21029.93 20621.14 20890.77 6516.65611 2022-06-21 05:59:59.999\n", - "6 20890.77 21202.00 20890.77 21192.08 6114.51662 2022-06-21 06:59:59.999\n", - "7 21192.08 21333.00 20952.46 21120.28 5433.08603 2022-06-21 07:59:59.999\n", - "8 21120.28 21500.01 21051.25 21356.32 6241.36801 2022-06-21 08:59:59.999\n", - "9 21356.32 21470.00 21166.94 21200.00 4961.57496 2022-06-21 09:59:59.999\n", - "10 21200.01 21307.83 21061.54 21299.27 3982.47578 2022-06-21 10:59:59.999\n", - "11 21299.02 21361.03 20911.13 20962.42 4685.42524 2022-06-21 11:59:59.999\n", - "12 20962.42 21163.24 20853.36 21106.20 5596.26303 2022-06-21 12:59:59.999\n", - "13 21106.19 21650.00 20995.64 21619.31 8667.59080 2022-06-21 13:59:59.999\n", - "14 21619.31 21723.00 21427.82 21590.79 6295.02429 2022-06-21 14:59:59.999\n", - "15 21590.79 21604.27 21339.07 21392.19 3809.54622 2022-06-21 15:59:59.999\n", - "16 21392.18 21550.39 21355.77 21401.52 2421.77629 2022-06-21 16:59:59.999\n", - "17 21401.52 21457.82 21195.70 21242.94 3755.82919 2022-06-21 17:59:59.999\n", - "18 21242.93 21256.99 21076.48 21100.01 2820.40675 2022-06-21 18:59:59.999\n", - "19 21100.00 21306.51 20870.01 20888.64 4015.33528 2022-06-21 19:59:59.999\n", - "20 20888.63 20987.38 20666.00 20859.86 4442.87596 2022-06-21 20:59:59.999\n", - "21 20859.86 21054.99 20808.00 20972.91 1813.56236 2022-06-21 21:59:59.999\n", - "22 20972.91 21003.70 20741.03 20897.00 2945.61650 2022-06-21 22:59:59.999\n", - "23 20897.00 20943.17 20551.00 20723.52 2613.77441 2022-06-21 23:59:59.999" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def calculate_ohclv(df: pd.DataFrame) -> pd.DataFrame:\n", - " df['hour'] = df['Close_time'].apply(lambda t: t.hour)\n", - "\n", - " return (\n", - " df\n", - " .groupby(['hour'])\n", - " .agg(\n", - " {\n", - " 'Open': 'first',\n", - " 'High': max,\n", - " 'Low': min,\n", - " 'Close': 'last',\n", - " 'Volume': sum,\n", - " 'Close_time': max\n", - " }\n", - " )\n", - " .reset_index()\n", - " .drop(columns=['hour'])\n", - " )\n", - "\n", - "btcusdt_1h_df = calculate_ohclv(btcusdt_df)\n", - "\n", - "btcusdt_1h_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Data validation is very important. Let's write domain-driven asserts:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "assert(\n", - " isinstance(btcusdt_1h_df, pd.DataFrame)\n", - " and btcusdt_1h_df.shape == (24, 6)\n", - " and not btcusdt_1h_df.isnull().any().any()\n", - " and btcusdt_1h_df.iloc[:, 0:5].ge(0).all().all()\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 3: Expand the dataset with information about `BTC/USDC` \n", - "\n", - "Download `BTC/USDC` 1-minute candles and transform it to 1-hour candles:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OpenHighLowCloseVolumeClose_time
020549.6520703.0820371.7520647.35284.735212022-06-21 00:59:59.999
120648.0620771.0720482.8320646.88192.505132022-06-21 01:59:59.999
220644.7820672.9820331.3620406.71195.670232022-06-21 02:59:59.999
320402.2720649.9920331.4820585.98290.392102022-06-21 03:59:59.999
420586.6320721.1020449.5720632.33205.784592022-06-21 04:59:59.999
520640.3320999.0020594.5820866.07412.424772022-06-21 05:59:59.999
620873.2521178.0020873.2521169.64524.258942022-06-21 06:59:59.999
721166.5821300.0020913.4021097.44304.651132022-06-21 07:59:59.999
821097.2021471.9121023.6421327.56366.292012022-06-21 08:59:59.999
921342.7621448.2221144.2421182.12350.775432022-06-21 09:59:59.999
1021175.7521298.5221038.3021270.98526.666022022-06-21 10:59:59.999
1121273.7321336.9420887.1720948.96579.002002022-06-21 11:59:59.999
1220936.5921143.9920800.0021079.42452.072142022-06-21 12:59:59.999
1321079.4121629.3620968.2121592.69507.911902022-06-21 13:59:59.999
1421595.1321699.9821394.0521572.00445.739782022-06-21 14:59:59.999
1521571.9621671.3421314.9421370.87435.992372022-06-21 15:59:59.999
1621370.0821533.1421330.0021377.52243.103682022-06-21 16:59:59.999
1721377.8521434.5821168.5921220.14291.461372022-06-21 17:59:59.999
1821220.1421233.9421054.7221074.20423.058362022-06-21 18:59:59.999
1921074.8121279.5220851.1020866.39266.464882022-06-21 19:59:59.999
2020864.3120960.9820645.0320838.52330.795692022-06-21 20:59:59.999
2120838.5121057.0120780.2920958.2299.098362022-06-21 21:59:59.999
2220950.0720975.9220719.0220875.37177.082032022-06-21 22:59:59.999
2320880.7120916.8520527.9020699.78173.227972022-06-21 23:59:59.999
\n", - "
" - ], - "text/plain": [ - " Open High Low Close Volume Close_time\n", - "0 20549.65 20703.08 20371.75 20647.35 284.73521 2022-06-21 00:59:59.999\n", - "1 20648.06 20771.07 20482.83 20646.88 192.50513 2022-06-21 01:59:59.999\n", - "2 20644.78 20672.98 20331.36 20406.71 195.67023 2022-06-21 02:59:59.999\n", - "3 20402.27 20649.99 20331.48 20585.98 290.39210 2022-06-21 03:59:59.999\n", - "4 20586.63 20721.10 20449.57 20632.33 205.78459 2022-06-21 04:59:59.999\n", - "5 20640.33 20999.00 20594.58 20866.07 412.42477 2022-06-21 05:59:59.999\n", - "6 20873.25 21178.00 20873.25 21169.64 524.25894 2022-06-21 06:59:59.999\n", - "7 21166.58 21300.00 20913.40 21097.44 304.65113 2022-06-21 07:59:59.999\n", - "8 21097.20 21471.91 21023.64 21327.56 366.29201 2022-06-21 08:59:59.999\n", - "9 21342.76 21448.22 21144.24 21182.12 350.77543 2022-06-21 09:59:59.999\n", - "10 21175.75 21298.52 21038.30 21270.98 526.66602 2022-06-21 10:59:59.999\n", - "11 21273.73 21336.94 20887.17 20948.96 579.00200 2022-06-21 11:59:59.999\n", - "12 20936.59 21143.99 20800.00 21079.42 452.07214 2022-06-21 12:59:59.999\n", - "13 21079.41 21629.36 20968.21 21592.69 507.91190 2022-06-21 13:59:59.999\n", - "14 21595.13 21699.98 21394.05 21572.00 445.73978 2022-06-21 14:59:59.999\n", - "15 21571.96 21671.34 21314.94 21370.87 435.99237 2022-06-21 15:59:59.999\n", - "16 21370.08 21533.14 21330.00 21377.52 243.10368 2022-06-21 16:59:59.999\n", - "17 21377.85 21434.58 21168.59 21220.14 291.46137 2022-06-21 17:59:59.999\n", - "18 21220.14 21233.94 21054.72 21074.20 423.05836 2022-06-21 18:59:59.999\n", - "19 21074.81 21279.52 20851.10 20866.39 266.46488 2022-06-21 19:59:59.999\n", - "20 20864.31 20960.98 20645.03 20838.52 330.79569 2022-06-21 20:59:59.999\n", - "21 20838.51 21057.01 20780.29 20958.22 99.09836 2022-06-21 21:59:59.999\n", - "22 20950.07 20975.92 20719.02 20875.37 177.08203 2022-06-21 22:59:59.999\n", - "23 20880.71 20916.85 20527.90 20699.78 173.22797 2022-06-21 23:59:59.999" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "btcusdc_df = get_data('BTCUSDC') # download data\n", - "btcusdc_df = set_column_names(btcusdc_df) # set column names\n", - "btcusdc_df['Close_time'] = btcusdc_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000)) # convert timestamp to date+time\n", - "\n", - "btcusdc_1h_df = calculate_ohclv(btcusdc_df) # calculate 1h OHCLV candles\n", - "btcusdc_1h_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Join altogether:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OpenHighLowCloseVolumeClose_timepair
020573.9020705.7420396.7720672.304235.773922022-06-21 00:59:59.999BTC-USDT
020549.6520703.0820371.7520647.35284.735212022-06-21 00:59:59.999BTC-USDC
120648.0620771.0720482.8320646.88192.505132022-06-21 01:59:59.999BTC-USDC
120672.3020783.9920504.8920670.883892.914122022-06-21 01:59:59.999BTC-USDT
220670.8720699.9320348.4020433.492876.889282022-06-21 02:59:59.999BTC-USDT
220644.7820672.9820331.3620406.71195.670232022-06-21 02:59:59.999BTC-USDC
320433.4920665.2620365.0020614.043306.770182022-06-21 03:59:59.999BTC-USDT
320402.2720649.9920331.4820585.98290.392102022-06-21 03:59:59.999BTC-USDC
420614.0420740.7220474.4120656.172925.335422022-06-21 04:59:59.999BTC-USDT
420586.6320721.1020449.5720632.33205.784592022-06-21 04:59:59.999BTC-USDC
520656.1721029.9320621.1420890.776516.656112022-06-21 05:59:59.999BTC-USDT
520640.3320999.0020594.5820866.07412.424772022-06-21 05:59:59.999BTC-USDC
620890.7721202.0020890.7721192.086114.516622022-06-21 06:59:59.999BTC-USDT
620873.2521178.0020873.2521169.64524.258942022-06-21 06:59:59.999BTC-USDC
721192.0821333.0020952.4621120.285433.086032022-06-21 07:59:59.999BTC-USDT
721166.5821300.0020913.4021097.44304.651132022-06-21 07:59:59.999BTC-USDC
821097.2021471.9121023.6421327.56366.292012022-06-21 08:59:59.999BTC-USDC
821120.2821500.0121051.2521356.326241.368012022-06-21 08:59:59.999BTC-USDT
921342.7621448.2221144.2421182.12350.775432022-06-21 09:59:59.999BTC-USDC
921356.3221470.0021166.9421200.004961.574962022-06-21 09:59:59.999BTC-USDT
1021175.7521298.5221038.3021270.98526.666022022-06-21 10:59:59.999BTC-USDC
1021200.0121307.8321061.5421299.273982.475782022-06-21 10:59:59.999BTC-USDT
1121273.7321336.9420887.1720948.96579.002002022-06-21 11:59:59.999BTC-USDC
1121299.0221361.0320911.1320962.424685.425242022-06-21 11:59:59.999BTC-USDT
1220936.5921143.9920800.0021079.42452.072142022-06-21 12:59:59.999BTC-USDC
1220962.4221163.2420853.3621106.205596.263032022-06-21 12:59:59.999BTC-USDT
1321079.4121629.3620968.2121592.69507.911902022-06-21 13:59:59.999BTC-USDC
1321106.1921650.0020995.6421619.318667.590802022-06-21 13:59:59.999BTC-USDT
1421619.3121723.0021427.8221590.796295.024292022-06-21 14:59:59.999BTC-USDT
1421595.1321699.9821394.0521572.00445.739782022-06-21 14:59:59.999BTC-USDC
1521571.9621671.3421314.9421370.87435.992372022-06-21 15:59:59.999BTC-USDC
1521590.7921604.2721339.0721392.193809.546222022-06-21 15:59:59.999BTC-USDT
1621370.0821533.1421330.0021377.52243.103682022-06-21 16:59:59.999BTC-USDC
1621392.1821550.3921355.7721401.522421.776292022-06-21 16:59:59.999BTC-USDT
1721377.8521434.5821168.5921220.14291.461372022-06-21 17:59:59.999BTC-USDC
1721401.5221457.8221195.7021242.943755.829192022-06-21 17:59:59.999BTC-USDT
1821242.9321256.9921076.4821100.012820.406752022-06-21 18:59:59.999BTC-USDT
1821220.1421233.9421054.7221074.20423.058362022-06-21 18:59:59.999BTC-USDC
1921074.8121279.5220851.1020866.39266.464882022-06-21 19:59:59.999BTC-USDC
1921100.0021306.5120870.0120888.644015.335282022-06-21 19:59:59.999BTC-USDT
2020864.3120960.9820645.0320838.52330.795692022-06-21 20:59:59.999BTC-USDC
2020888.6320987.3820666.0020859.864442.875962022-06-21 20:59:59.999BTC-USDT
2120838.5121057.0120780.2920958.2299.098362022-06-21 21:59:59.999BTC-USDC
2120859.8621054.9920808.0020972.911813.562362022-06-21 21:59:59.999BTC-USDT
2220950.0720975.9220719.0220875.37177.082032022-06-21 22:59:59.999BTC-USDC
2220972.9121003.7020741.0320897.002945.616502022-06-21 22:59:59.999BTC-USDT
2320897.0020943.1720551.0020723.522613.774412022-06-21 23:59:59.999BTC-USDT
2320880.7120916.8520527.9020699.78173.227972022-06-21 23:59:59.999BTC-USDC
\n", - "
" - ], - "text/plain": [ - " Open High Low Close Volume \\\n", - "0 20573.90 20705.74 20396.77 20672.30 4235.77392 \n", - "0 20549.65 20703.08 20371.75 20647.35 284.73521 \n", - "1 20648.06 20771.07 20482.83 20646.88 192.50513 \n", - "1 20672.30 20783.99 20504.89 20670.88 3892.91412 \n", - "2 20670.87 20699.93 20348.40 20433.49 2876.88928 \n", - "2 20644.78 20672.98 20331.36 20406.71 195.67023 \n", - "3 20433.49 20665.26 20365.00 20614.04 3306.77018 \n", - "3 20402.27 20649.99 20331.48 20585.98 290.39210 \n", - "4 20614.04 20740.72 20474.41 20656.17 2925.33542 \n", - "4 20586.63 20721.10 20449.57 20632.33 205.78459 \n", - "5 20656.17 21029.93 20621.14 20890.77 6516.65611 \n", - "5 20640.33 20999.00 20594.58 20866.07 412.42477 \n", - "6 20890.77 21202.00 20890.77 21192.08 6114.51662 \n", - "6 20873.25 21178.00 20873.25 21169.64 524.25894 \n", - "7 21192.08 21333.00 20952.46 21120.28 5433.08603 \n", - "7 21166.58 21300.00 20913.40 21097.44 304.65113 \n", - "8 21097.20 21471.91 21023.64 21327.56 366.29201 \n", - "8 21120.28 21500.01 21051.25 21356.32 6241.36801 \n", - "9 21342.76 21448.22 21144.24 21182.12 350.77543 \n", - "9 21356.32 21470.00 21166.94 21200.00 4961.57496 \n", - "10 21175.75 21298.52 21038.30 21270.98 526.66602 \n", - "10 21200.01 21307.83 21061.54 21299.27 3982.47578 \n", - "11 21273.73 21336.94 20887.17 20948.96 579.00200 \n", - "11 21299.02 21361.03 20911.13 20962.42 4685.42524 \n", - "12 20936.59 21143.99 20800.00 21079.42 452.07214 \n", - "12 20962.42 21163.24 20853.36 21106.20 5596.26303 \n", - "13 21079.41 21629.36 20968.21 21592.69 507.91190 \n", - "13 21106.19 21650.00 20995.64 21619.31 8667.59080 \n", - "14 21619.31 21723.00 21427.82 21590.79 6295.02429 \n", - "14 21595.13 21699.98 21394.05 21572.00 445.73978 \n", - "15 21571.96 21671.34 21314.94 21370.87 435.99237 \n", - "15 21590.79 21604.27 21339.07 21392.19 3809.54622 \n", - "16 21370.08 21533.14 21330.00 21377.52 243.10368 \n", - "16 21392.18 21550.39 21355.77 21401.52 2421.77629 \n", - "17 21377.85 21434.58 21168.59 21220.14 291.46137 \n", - "17 21401.52 21457.82 21195.70 21242.94 3755.82919 \n", - "18 21242.93 21256.99 21076.48 21100.01 2820.40675 \n", - "18 21220.14 21233.94 21054.72 21074.20 423.05836 \n", - "19 21074.81 21279.52 20851.10 20866.39 266.46488 \n", - "19 21100.00 21306.51 20870.01 20888.64 4015.33528 \n", - "20 20864.31 20960.98 20645.03 20838.52 330.79569 \n", - "20 20888.63 20987.38 20666.00 20859.86 4442.87596 \n", - "21 20838.51 21057.01 20780.29 20958.22 99.09836 \n", - "21 20859.86 21054.99 20808.00 20972.91 1813.56236 \n", - "22 20950.07 20975.92 20719.02 20875.37 177.08203 \n", - "22 20972.91 21003.70 20741.03 20897.00 2945.61650 \n", - "23 20897.00 20943.17 20551.00 20723.52 2613.77441 \n", - "23 20880.71 20916.85 20527.90 20699.78 173.22797 \n", - "\n", - " Close_time pair \n", - "0 2022-06-21 00:59:59.999 BTC-USDT \n", - "0 2022-06-21 00:59:59.999 BTC-USDC \n", - "1 2022-06-21 01:59:59.999 BTC-USDC \n", - "1 2022-06-21 01:59:59.999 BTC-USDT \n", - "2 2022-06-21 02:59:59.999 BTC-USDT \n", - "2 2022-06-21 02:59:59.999 BTC-USDC \n", - "3 2022-06-21 03:59:59.999 BTC-USDT \n", - "3 2022-06-21 03:59:59.999 BTC-USDC \n", - "4 2022-06-21 04:59:59.999 BTC-USDT \n", - "4 2022-06-21 04:59:59.999 BTC-USDC \n", - "5 2022-06-21 05:59:59.999 BTC-USDT \n", - "5 2022-06-21 05:59:59.999 BTC-USDC \n", - "6 2022-06-21 06:59:59.999 BTC-USDT \n", - "6 2022-06-21 06:59:59.999 BTC-USDC \n", - "7 2022-06-21 07:59:59.999 BTC-USDT \n", - "7 2022-06-21 07:59:59.999 BTC-USDC \n", - "8 2022-06-21 08:59:59.999 BTC-USDC \n", - "8 2022-06-21 08:59:59.999 BTC-USDT \n", - "9 2022-06-21 09:59:59.999 BTC-USDC \n", - "9 2022-06-21 09:59:59.999 BTC-USDT \n", - "10 2022-06-21 10:59:59.999 BTC-USDC \n", - "10 2022-06-21 10:59:59.999 BTC-USDT \n", - "11 2022-06-21 11:59:59.999 BTC-USDC \n", - "11 2022-06-21 11:59:59.999 BTC-USDT \n", - "12 2022-06-21 12:59:59.999 BTC-USDC \n", - "12 2022-06-21 12:59:59.999 BTC-USDT \n", - "13 2022-06-21 13:59:59.999 BTC-USDC \n", - "13 2022-06-21 13:59:59.999 BTC-USDT \n", - "14 2022-06-21 14:59:59.999 BTC-USDT \n", - "14 2022-06-21 14:59:59.999 BTC-USDC \n", - "15 2022-06-21 15:59:59.999 BTC-USDC \n", - "15 2022-06-21 15:59:59.999 BTC-USDT \n", - "16 2022-06-21 16:59:59.999 BTC-USDC \n", - "16 2022-06-21 16:59:59.999 BTC-USDT \n", - "17 2022-06-21 17:59:59.999 BTC-USDC \n", - "17 2022-06-21 17:59:59.999 BTC-USDT \n", - "18 2022-06-21 18:59:59.999 BTC-USDT \n", - "18 2022-06-21 18:59:59.999 BTC-USDC \n", - "19 2022-06-21 19:59:59.999 BTC-USDC \n", - "19 2022-06-21 19:59:59.999 BTC-USDT \n", - "20 2022-06-21 20:59:59.999 BTC-USDC \n", - "20 2022-06-21 20:59:59.999 BTC-USDT \n", - "21 2022-06-21 21:59:59.999 BTC-USDC \n", - "21 2022-06-21 21:59:59.999 BTC-USDT \n", - "22 2022-06-21 22:59:59.999 BTC-USDC \n", - "22 2022-06-21 22:59:59.999 BTC-USDT \n", - "23 2022-06-21 23:59:59.999 BTC-USDT \n", - "23 2022-06-21 23:59:59.999 BTC-USDC " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "btcusdt_1h_df['pair'] = 'BTC-USDT'\n", - "btcusdc_1h_df['pair'] = 'BTC-USDC'\n", - "\n", - "# Join datasets\n", - "candles_1h_df = pd.concat([btcusdt_1h_df, btcusdc_1h_df])\n", - "\n", - "# Validate result\n", - "assert(\n", - " isinstance(candles_1h_df, pd.DataFrame)\n", - " and candles_1h_df.shape == (48, 7)\n", - " and (candles_1h_df['pair'].unique() == ['BTC-USDT', 'BTC-USDC']).all()\n", - ")\n", - "\n", - "# Sort output by Close_time\n", - "candles_1h_df.sort_values('Close_time')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 4: Advanced analytics and visualization\n", - "\n", - "Great! We have a dataset with 2 pairs. Let's think about what interesting things can be found there :bulb: and try to visualize it.\n", - "Feel free to use your favorite framework(s) for visualization." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Conclusion\n", - "\n", - "Great job! Thanks for your work and ideas. I hope it was an exciting journey!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.13 ('base')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "6fd7ff10be7e3a66c1b3745c4cbc00041a2589eb74ab4be46a3698a7b56001aa" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}