# lib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns;sns.set()
'tableau-colorblind10')
plt.style.use(
# different models
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score
from features import tautil
from labeling import labeling
from mlutil.pkfold import PKFold
머신러닝을 이용한 트레이딩: (5) 매매 시그널 분류
Get Trading Signals
trading
signals
classification
machine learning
모멘텀 분류기 (Momentum Classifier)
inputs
- labels: trend-scanning labeling (up vs. down & no trend)
- features: market-data selected features
models: SVM, Random Forest, Gradient Boosting, LSTM
outputs
- momentum signals
import warnings
='ignore') warnings.filterwarnings(action
get X,y
= pd.read_csv('C:data/market_samsung.csv')
market_df = market_df.rename(columns={market_df.columns[0]:'Date'})
market_df = pd.to_datetime(market_df.Date)
market_df.index ='Date',inplace=True)
market_df.drop(columns=True)
market_df.dropna(inplace
= pd.read_csv('C:data/features_samsung.csv')
feature_df = feature_df.rename(columns={feature_df.columns[0]:'Date'})
feature_df = pd.to_datetime(feature_df.Date)
feature_df.index ='Date',inplace=True)
feature_df.drop(columns=True) feature_df.dropna(inplace
= pd.read_csv('C:data/selected_features.csv').columns[1:] selected_features
= feature_df.dropna()
feature = feature[selected_features]
feature
= StandardScaler()
sc = sc.fit_transform(feature)
X_sc = pd.DataFrame(X_sc, index=feature.index, columns=feature.columns) X_sc
for i in feature.columns:
=(10,1))
plt.figure(figsize
plt.title(i) plt.plot(feature[i])
#labeling
= 60
trend_scanning_window = 3
trend_scanning_q = labeling.trend_scanning_label(market_df['2010':].close, window = trend_scanning_window, q = trend_scanning_q)
ts_out = ts_out[0] mom_label
= np.sign(mom_label-1)+1 # up-trend vs. others(down-trend and no-trend) y
= y[:'2020']
y_ = market_df.close.loc[y_.index]
close = plt.subplots(2, gridspec_kw={'height_ratios': [5, 1]}, figsize=(15,5))
f, (a0, a1) "Trend Scanning Labels: 1(up-trend), 0(down & no-trend)")
f.suptitle(=0.2)
a0.plot(close,alpha=y_, cmap='vlag')
a0.scatter(close.index,close,c0.5))
a1.plot(y_.fillna( f.show()
= X_sc.copy()
raw_X
= raw_X.join(y).dropna()
tmp =tmp.iloc[:,:-1]
X=tmp.iloc[:,-1] y
Models & Models with Hyperparameter tuning
# Cross Validation (purged k-fold)
=4
n_cv= ts_out[1].loc[X.index]
t1= PKFold(n_cv,t1,0.01) cv
# Choose model (SVM-rbf)
= [0.1, 1, 10]
C = dict(C=C)
param_grid_rbf = SVC(kernel='rbf', probability=True)
svc_rbf = GridSearchCV(estimator=svc_rbf, param_grid= param_grid_rbf, cv=cv)
gs_svc_rbf
gs_svc_rbf.fit(X,y)= gs_svc_rbf.best_estimator_
svc_best svc_best
SVC(C=0.1, probability=True)
= [500,1000]
n_estimators = [3,5]
max_depth = dict(n_estimators=n_estimators, max_depth=max_depth)
param_grid_rfc = RandomForestClassifier(class_weight='balanced')
rfc = GridSearchCV(estimator=rfc, param_grid= param_grid_rfc, cv=cv)
gs_rfc
gs_rfc.fit(X,y)= gs_rfc.best_estimator_
rfc_best rfc_best
RandomForestClassifier(class_weight='balanced', max_depth=5, n_estimators=1000)
= [200,500]
n_estimators_ab = [0.01,0.1]
learning_rate = dict(n_estimators=n_estimators_ab, learning_rate=learning_rate)
param_grid_abc
=AdaBoostClassifier()
abc= GridSearchCV(estimator=abc, param_grid= param_grid_abc, cv=cv)
gs_abc
gs_abc.fit(X,y)= gs_abc.best_estimator_
ada_best ada_best
AdaBoostClassifier(learning_rate=0.01, n_estimators=200)
= [200,500]
n_estimators_gb = [0.01,0.1]
learning_rate = dict(n_estimators=n_estimators_gb, learning_rate=learning_rate)
param_grid_gbc =GradientBoostingClassifier()
gbc= GridSearchCV(estimator=gbc, param_grid= param_grid_gbc, cv=cv)
gs_gbc
gs_gbc.fit(X,y)= gs_gbc.best_estimator_
gbc_best gbc_best
GradientBoostingClassifier(n_estimators=200)
= [svc_best, rfc_best, ada_best, gbc_best]
clf_list =['SVM_best','RF_best','AdaBoost_best','GradientBoost_best']
estimators= []
scores_list = []
y_preds_list = []
y_probs_list
# for ML model prediction
for clf in clf_list:
= []
y_preds_ = []
y_probs_
for train, test in cv.split(X, y):
clf.fit(X.iloc[train], y.iloc[train])= y.iloc[test]
y_true = clf.predict(X.iloc[test])
y_pred = clf.predict_proba(X.iloc[test])
y_probs = y_probs[:, 1]
y_probs = pd.Series(y_pred,index=y[test].index)
y_pred_series = pd.Series(y_probs,index=y[test].index)
y_probs_series
y_preds_.append(y_pred_series)
y_probs_.append(y_probs_series)
= pd.concat([i for i in y_preds_])
y_preds__ = pd.concat([i for i in y_probs_])
y_probs__ = y.loc[y_preds__.index]
y_true__ = accuracy_score(y_true__, y_preds__)
accs =f1_score(y_true__, y_preds__)
f1=roc_auc_score(y_true__, y_probs__)
roc=precision_score(y_true__, y_preds__)
prec= [accs, f1, roc, prec]
score
scores_list.append(score)
y_preds_list.append(y_preds__) y_probs_list.append(y_probs__)
= pd.DataFrame(scores_list, columns=['accuracy','f1 score','roc auc score','precision score'],index=estimators)
results = results.sort_values('accuracy', ascending=False)
result_show result_show
accuracy | f1 score | roc auc score | precision score | |
---|---|---|---|---|
AdaBoost_best | 0.563135 | 0.123726 | 0.448611 | 0.412621 |
SVM_best | 0.510160 | 0.299793 | 0.476269 | 0.380263 |
GradientBoost_best | 0.493832 | 0.468166 | 0.489565 | 0.421993 |
RF_best | 0.488389 | 0.503871 | 0.498126 | 0.427718 |
= pd.concat(y_probs_list, axis=1).dropna()
y_probs_df = estimators
y_probs_df.columns
'mean_'] = y_probs_df.mean(axis=1)
y_probs_df[
= pd.Series(y_probs_df.mean_,index=y_probs_df.index) momentum
Select the model
1] plt.hist(momentum)[
array([0.15405224, 0.22668779, 0.29932334, 0.37195889, 0.44459443,
0.51722998, 0.58986553, 0.66250108, 0.73513662, 0.80777217,
0.88040772])
= momentum.loc['2010':'2020'] momentum
= market_df.close.loc[momentum.index]
close =(10,4))
plt.figure(figsize=0.2)
plt.plot(close, alpha#plt.title('Momentum signals')
=momentum, s=10,cmap='gray_r',vmin=0,vmax=1)
plt.scatter(momentum.index, close, c
plt.colorbar()'price','darker = long signals'])
plt.legend([ plt.show()
'signals').to_csv('C:data/momentum_signals.csv') momentum.rename(