mirror of
https://github.com/codez0mb1e/resistance.git
synced 2024-11-23 02:42:20 +00:00
commit
5cff2c0eda
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
/data/*.csv
|
||||
/data/*.zip
|
337
src/binance_open_data.ipynb
Normal file
337
src/binance_open_data.ipynb
Normal file
@ -0,0 +1,337 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Binance Open Data lab"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[Binance Open Data](https://github.com/binance/binance-public-data/#klines) and analyze it.\n",
|
||||
"\n",
|
||||
"### Stet 1. Download data \n",
|
||||
"\n",
|
||||
"Downloading __1-minute candles__ for `BTC/USDT` and `BTC/UDSC` using `bash` or `powershell` scripts:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "shellscript"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!/bin/sh\n",
|
||||
"\n",
|
||||
"# create dir for data\n",
|
||||
"!mkdir ../data\n",
|
||||
"\n",
|
||||
"# download data using GET request\n",
|
||||
"!wget -N -P ../data https://data.binance.vision/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2022-06-21.zip\n",
|
||||
"!wget -N -P../data https://data.binance.vision/data/spot/daily/klines/BTCUSDC/1m/BTCUSDC-1m-2022-06-21.zip\n",
|
||||
"\n",
|
||||
"# unzip\n",
|
||||
"!unzip -o -d ../data ../data/BTCUSDT-1m-2022-06-21.zip \n",
|
||||
"!unzip -o -d ../data ../data/BTCUSDC-1m-2022-06-21.zip"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Step 2: Import data to Dataframe \n",
|
||||
"\n",
|
||||
"Import packages for data analysis:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"import httpx\n",
|
||||
"\n",
|
||||
"from datetime import datetime"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Import data from CSV file to Pandas DataFrame:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_data(pair: str) -> pd.DataFrame:\n",
|
||||
" return pd.read_csv(f'../data/{pair}-1m-2022-06-21.csv', header = None)\n",
|
||||
"\n",
|
||||
"btcusdt_df = get_data('BTCUSDT')\n",
|
||||
"btcusdt_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set names to columns:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def set_column_names(df: pd.DataFrame) -> pd.DataFrame:\n",
|
||||
" column_names_mapping = {\n",
|
||||
" 0: 'Open_time',\n",
|
||||
" 1: 'Open',\n",
|
||||
" 2: 'High',\n",
|
||||
" 3: 'Low',\n",
|
||||
" 4: 'Close',\n",
|
||||
" 5: 'Volume',\n",
|
||||
" 6: 'Close_time',\n",
|
||||
" 7: 'Quote_asset_volume',\n",
|
||||
" 8: 'Number_of_trades',\n",
|
||||
" 9: 'Taker_buy_base_asset_volume',\n",
|
||||
" 10: 'Taker_buy_quote_asset_volume',\n",
|
||||
" 11: 'Ignore'\n",
|
||||
" }\n",
|
||||
" return df.rename(columns=column_names_mapping)\n",
|
||||
"\n",
|
||||
"btcusdt_df = set_column_names(btcusdt_df)\n",
|
||||
"btcusdt_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Convert timestamp to human-readable date and time format:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"btcusdt_df['Open_time'] = btcusdt_df.iloc[:, 0].apply(lambda t: datetime.fromtimestamp(t/1000))\n",
|
||||
"btcusdt_df['Close_time'] = btcusdt_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000))\n",
|
||||
"\n",
|
||||
"btcusdt_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's take a look at _Descriptive statistics_ (min, mean, max, standard deviation):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"btcusdt_df.describe(datetime_is_numeric=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Step 2: Transform data\n",
|
||||
"\n",
|
||||
"Calculate __1-hour OHLCV__ candles:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def calculate_ohclv(df: pd.DataFrame) -> pd.DataFrame:\n",
|
||||
" df['hour'] = df['Close_time'].apply(lambda t: t.hour)\n",
|
||||
"\n",
|
||||
" return (\n",
|
||||
" df\n",
|
||||
" .groupby(['hour'])\n",
|
||||
" .agg(\n",
|
||||
" {\n",
|
||||
" 'Open': 'first',\n",
|
||||
" 'High': max,\n",
|
||||
" 'Low': min,\n",
|
||||
" 'Close': 'last',\n",
|
||||
" 'Volume': sum,\n",
|
||||
" 'Close_time': max\n",
|
||||
" }\n",
|
||||
" )\n",
|
||||
" .reset_index()\n",
|
||||
" .drop(columns=['hour'])\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"btcusdt_1h_df = calculate_ohclv(btcusdt_df)\n",
|
||||
"\n",
|
||||
"btcusdt_1h_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Data validation is very important. Let's write domain-driven asserts:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"assert(\n",
|
||||
" isinstance(btcusdt_1h_df, pd.DataFrame)\n",
|
||||
" and btcusdt_1h_df.shape == (24, 6)\n",
|
||||
" and not btcusdt_1h_df.isnull().any().any()\n",
|
||||
" and btcusdt_1h_df.iloc[:, 0:5].ge(0).all().all()\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Step 3: Expand the dataset with information about `BTC/USDC` \n",
|
||||
"\n",
|
||||
"Download `BTC/USDC` 1-minute candles and transform it to 1-hour candles:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"btcusdc_df = get_data('BTCUSDC') # download data\n",
|
||||
"btcusdc_df = set_column_names(btcusdc_df) # set column names\n",
|
||||
"btcusdc_df['Close_time'] = btcusdc_df.iloc[:, 6].apply(lambda t: datetime.fromtimestamp(t/1000)) # convert timestamp to date+time\n",
|
||||
"\n",
|
||||
"btcusdc_1h_df = calculate_ohclv(btcusdc_df) # calculate 1h OHCLV candles\n",
|
||||
"btcusdc_1h_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Join altogether:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"btcusdt_1h_df['pair'] = 'BTC-USDT'\n",
|
||||
"btcusdc_1h_df['pair'] = 'BTC-USDC'\n",
|
||||
"\n",
|
||||
"# Join datasets\n",
|
||||
"candles_1h_df = pd.concat([btcusdt_1h_df, btcusdc_1h_df])\n",
|
||||
"\n",
|
||||
"# Validate result\n",
|
||||
"assert(\n",
|
||||
" isinstance(candles_1h_df, pd.DataFrame)\n",
|
||||
" and candles_1h_df.shape == (48, 7)\n",
|
||||
" and (candles_1h_df['pair'].unique() == ['BTC-USDT', 'BTC-USDC']).all()\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Sort output by Close_time\n",
|
||||
"candles_1h_df.sort_values('Close_time')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### (Optional) Use Binance API"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# %%\n",
|
||||
"import pandas as pd \n",
|
||||
"from binance.client import Client\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# %%\n",
|
||||
"api_key = \"****\"\n",
|
||||
"secret_key = \"***\"\n",
|
||||
"\n",
|
||||
"client = Client(api_key, secret_key)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# %%\n",
|
||||
"coins_response = client.get_all_coins_info()\n",
|
||||
"coins_df = pd.DataFrame.from_dict(coins_response, orient='columns')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# %%\n",
|
||||
"pairs_list = coins_df.coin.apply(lambda x: f\"{x}USDT\") \n",
|
||||
"client.get_historical_klines(\n",
|
||||
" 'BTCUSDT', \n",
|
||||
" interval=Client.KLINE_INTERVAL_1HOUR,\n",
|
||||
" start_str='2022-04-21', \n",
|
||||
" end_str='2022-04-22'\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.13 ('base')",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "6fd7ff10be7e3a66c1b3745c4cbc00041a2589eb74ab4be46a3698a7b56001aa"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Loading…
Reference in New Issue
Block a user