2022-03-29 08:42:43 +00:00
|
|
|
---
|
|
|
|
title: "Currencies Analysis"
|
|
|
|
output: github_document
|
|
|
|
---
|
|
|
|
|
|
|
|
```{r setup, include=FALSE}
|
|
|
|
knitr::opts_chunk$set(echo = T, warning = F)
|
|
|
|
```
|
|
|
|
|
|
|
|
***Analysis price of the my list of ~~the most promised cryptotokens~~ currencies.***
|
|
|
|
|
2022-03-29 09:12:29 +00:00
|
|
|
## Prepare
|
|
|
|
|
|
|
|
Install packages and set environment :earth_asia:
|
2022-03-29 08:42:43 +00:00
|
|
|
|
|
|
|
`install.packages("azuremlsdk")`
|
|
|
|
|
|
|
|
```{r set_envinroment, message=FALSE}
|
2022-03-29 09:12:29 +00:00
|
|
|
suppressPackageStartupMessages({
|
|
|
|
library(dplyr)
|
|
|
|
library(tidyr)
|
|
|
|
|
|
|
|
library(lubridate)
|
|
|
|
library(stringr)
|
|
|
|
|
|
|
|
library(gt)
|
|
|
|
library(tidyverse)
|
|
|
|
library(glue)
|
|
|
|
|
|
|
|
library(ggplot2)
|
|
|
|
|
|
|
|
library(azuremlsdk)
|
|
|
|
})
|
|
|
|
```
|
2022-03-29 08:42:43 +00:00
|
|
|
|
|
|
|
|
2022-03-29 09:12:29 +00:00
|
|
|
```{r set_params}
|
|
|
|
.azureml_dataset_name <- "Currencies"
|
|
|
|
```
|
2022-03-29 08:42:43 +00:00
|
|
|
|
|
|
|
|
2022-03-29 09:12:29 +00:00
|
|
|
Connect to Azure ML workspace:
|
2022-03-29 08:42:43 +00:00
|
|
|
|
2022-03-29 09:12:29 +00:00
|
|
|
```{r azureml_connect}
|
2022-03-29 08:42:43 +00:00
|
|
|
ws <- azuremlsdk::load_workspace_from_config()
|
|
|
|
sprintf(
|
2022-03-29 09:12:29 +00:00
|
|
|
"%s workspace located in %s region", ws$name, ws$location
|
2022-03-29 08:42:43 +00:00
|
|
|
)
|
|
|
|
```
|
|
|
|
|
|
|
|
|
2022-03-29 09:12:29 +00:00
|
|
|
## Load dataset
|
|
|
|
|
2022-03-29 08:42:43 +00:00
|
|
|
```{r get_azure_dataset}
|
|
|
|
currencies_ds <- azuremlsdk::get_dataset_by_name(ws, name = .azureml_dataset_name)
|
|
|
|
currencies_ds$name
|
|
|
|
currencies_ds$description
|
|
|
|
```
|
2022-03-29 09:12:29 +00:00
|
|
|
Get USD/RUB top higher rates:
|
2022-03-29 08:42:43 +00:00
|
|
|
|
|
|
|
```{r prepare_dataframe}
|
|
|
|
quotes_df <- currencies_ds$to_pandas_dataframe()
|
|
|
|
|
|
|
|
# ~ 20 years, 150 currencies and 1.5M rows
|
|
|
|
|
|
|
|
quotes_df %>%
|
|
|
|
filter(slug == "USD/RUB") %>%
|
|
|
|
select(-slug) %>%
|
|
|
|
top_n(10) %>%
|
|
|
|
|
|
|
|
gt() %>%
|
|
|
|
tab_header(
|
|
|
|
title = "USD/RUB Rate",
|
|
|
|
subtitle = glue("{min(quotes_df$date)} to {max(quotes_df$date)}")
|
|
|
|
) %>%
|
|
|
|
fmt_date(
|
|
|
|
columns = date,
|
|
|
|
date_style = 6
|
|
|
|
) %>%
|
|
|
|
fmt_number(
|
|
|
|
columns = c(open, high, low, close)
|
|
|
|
)
|
|
|
|
```
|
|
|
|
|
2022-03-29 09:12:29 +00:00
|
|
|
## Preprocessing data
|
|
|
|
|
|
|
|
Calculate `Return` and `Log Return` for last 10 years:
|
|
|
|
|
2022-03-29 08:42:43 +00:00
|
|
|
```{r preprocessing}
|
|
|
|
quotes_df %<>%
|
|
|
|
transmute(
|
|
|
|
symbol = slug,
|
|
|
|
price = close,
|
|
|
|
date
|
|
|
|
) %>%
|
|
|
|
|
|
|
|
filter(
|
|
|
|
str_detect(symbol, "USD/") &
|
|
|
|
date > max(date) - lubridate::years(10)
|
|
|
|
) %>%
|
|
|
|
|
|
|
|
filter(!(symbol == "USD/RUB" & price < 1)) %>%
|
|
|
|
|
|
|
|
arrange(date) %>%
|
|
|
|
group_by(symbol) %>%
|
|
|
|
|
|
|
|
mutate(
|
|
|
|
return = c(NA_real_, diff(price))/lag(price),
|
|
|
|
log_return = log(1 + return)
|
|
|
|
) %>%
|
|
|
|
na.omit
|
|
|
|
```
|
|
|
|
|
2022-03-29 09:54:55 +00:00
|
|
|
## Discover Data
|
|
|
|
|
|
|
|
Calculate statistics and `volatility`:
|
2022-03-29 08:42:43 +00:00
|
|
|
|
|
|
|
```{r discover}
|
|
|
|
quotes_stats <- quotes_df %>%
|
|
|
|
|
|
|
|
summarise(
|
|
|
|
max_price = max(price),
|
|
|
|
min_price = min(price),
|
|
|
|
last_price = last(price),
|
|
|
|
max_min_rate = max(price)/min(price),
|
|
|
|
volatility = sd(log_return)
|
|
|
|
)
|
|
|
|
|
|
|
|
quotes_stats %>%
|
|
|
|
mutate(
|
|
|
|
`100x Volatility` = volatility*100
|
|
|
|
) %>%
|
|
|
|
arrange(volatility) %>%
|
|
|
|
select(-volatility) %>%
|
|
|
|
|
|
|
|
gt() %>%
|
|
|
|
tab_header(
|
2022-03-29 09:54:55 +00:00
|
|
|
title = "The Least and The Most Volatile Currencies",
|
2022-03-29 08:42:43 +00:00
|
|
|
subtitle = glue("{min(quotes_df$date)} to {max(quotes_df$date)}")
|
|
|
|
) %>%
|
|
|
|
fmt_number(
|
2022-03-29 09:54:55 +00:00
|
|
|
columns = c(max_price, min_price, max_min_rate, last_price, `100x Volatility`)
|
2022-03-29 08:42:43 +00:00
|
|
|
)
|
|
|
|
```
|
2022-03-29 09:54:55 +00:00
|
|
|
My broker trades the following pairs:
|
2022-03-29 08:42:43 +00:00
|
|
|
|
|
|
|
```{r}
|
|
|
|
symbols <- c(
|
|
|
|
'RUB',
|
|
|
|
'EUR', 'GBP', 'CHF', 'CNY', 'HKD', 'JPY', 'SEK', 'SGD', 'AUD',
|
2022-03-29 09:12:29 +00:00
|
|
|
'AED', 'KZT', 'BYN', 'TRY', 'MXN'
|
2022-03-29 08:42:43 +00:00
|
|
|
)
|
|
|
|
|
2022-03-29 09:54:55 +00:00
|
|
|
symbols <- str_c("USD", symbols, sep = "/")
|
|
|
|
|
|
|
|
|
2022-03-29 08:42:43 +00:00
|
|
|
quotes_stats %>%
|
|
|
|
filter(
|
2022-03-29 09:54:55 +00:00
|
|
|
symbol %in% symbols
|
2022-03-29 08:42:43 +00:00
|
|
|
) %>%
|
|
|
|
mutate(
|
|
|
|
`100x Volatility` = volatility*100
|
|
|
|
) %>%
|
|
|
|
arrange(volatility) %>%
|
|
|
|
select(-volatility) %>%
|
|
|
|
|
|
|
|
gt() %>%
|
|
|
|
tab_header(
|
2022-03-29 09:12:29 +00:00
|
|
|
title = "The Most Promised Currencies",
|
2022-03-29 08:42:43 +00:00
|
|
|
subtitle = glue("{min(quotes_df$date)} to {max(quotes_df$date)}")
|
|
|
|
) %>%
|
|
|
|
fmt_number(
|
2022-03-29 09:54:55 +00:00
|
|
|
columns = c(max_price, min_price, last_price, max_min_rate, `100x Volatility`)
|
2022-03-29 08:42:43 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
Plot exchange rate for out favorites:
|
|
|
|
|
|
|
|
Define low risk symbols:
|
|
|
|
|
|
|
|
```{r}
|
2022-03-29 09:54:55 +00:00
|
|
|
usdrub_vol <- quotes_stats %>% filter(symbol == "USD/RUB") %>% pull(volatility)
|
|
|
|
|
2022-03-29 08:42:43 +00:00
|
|
|
low_risk_symbols <- quotes_stats %>%
|
|
|
|
filter(
|
2022-03-29 09:54:55 +00:00
|
|
|
symbol %in% symbols &
|
|
|
|
volatility <= usdrub_vol
|
2022-03-29 08:42:43 +00:00
|
|
|
) %>%
|
|
|
|
pull(symbol) %>%
|
|
|
|
unique
|
|
|
|
|
|
|
|
cat(
|
|
|
|
sprintf(
|
|
|
|
"['%s']",
|
|
|
|
paste(low_risk_symbols, collapse = "', '")
|
|
|
|
))
|
|
|
|
```
|
|
|
|
|
|
|
|
|
2022-03-29 09:54:55 +00:00
|
|
|
```{r}
|
|
|
|
jumper_symbols <- quotes_stats %>% filter(max_min_rate > 2) %>% pull(symbol)
|
|
|
|
|
|
|
|
quotes_df %>%
|
|
|
|
filter(symbol %in% low_risk_symbols) %>%
|
|
|
|
mutate(
|
2022-03-29 12:06:17 +00:00
|
|
|
jumper = if_else(symbol %in% jumper_symbols, "High risk currencies", "Low risk currencies")
|
2022-03-29 09:54:55 +00:00
|
|
|
) %>%
|
|
|
|
group_by(symbol) %>%
|
|
|
|
mutate(R = cumsum(return)) %>%
|
|
|
|
|
|
|
|
ggplot +
|
|
|
|
geom_line(aes(x = date, y = R, color = symbol)) +
|
2022-03-29 12:06:17 +00:00
|
|
|
|
2022-03-29 09:54:55 +00:00
|
|
|
facet_grid(jumper ~ ., scales = "free") +
|
|
|
|
|
2022-03-29 12:06:17 +00:00
|
|
|
labs(x = "", y = "Return of Investment", title = "Currencies Exchange Rates", subtitle = "Return of Investment for last 10 years") +
|
2022-03-29 09:54:55 +00:00
|
|
|
theme_bw()
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2022-03-29 08:42:43 +00:00
|
|
|
|