1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
| #Importing Libraries
!python -m pip install yahoofinancials
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from yahoofinancials import YahooFinancials
ticker_details = pd.read_excel("Ticker List.xlsx")
ticker_details.head(20)
ticker = ticker_details['Ticker'].to_list()
names = ticker_details['Description'].to_list()
#Extracting Data from Yahoo Finance and Adding them to Values table using date as key
end_date= "2020-09-16"
start_date = "2009-01-01"
date_range = pd.bdate_range(start=start_date,end=end_date)
values = pd.DataFrame({ 'Date': date_range})
values['Date']= pd.to_datetime(values['Date'])
#Extracting Data from Yahoo Finance and Adding them to Values table using date as key
for i in ticker:
raw_data = YahooFinancials(i)
raw_data = raw_data.get_historical_price_data(start_date, end_date, "daily")
df = pd.DataFrame(raw_data[i]['prices'])[['formatted_date','adjclose']]
df.columns = ['Date1',i]
df['Date1']= pd.to_datetime(df['Date1'])
values = values.merge(df,how='left',left_on='Date',right_on='Date1')
values = values.drop(labels='Date1',axis=1)
#Renaming columns to represent instrument names rather than their ticker codes for ease of readability
names.insert(0,'Date')
values.columns = names
print(values.shape)
print(values.isna().sum())
values.tail()
#Front filling the NaN values in the data set
values = values.fillna(method="ffill",axis=0)
values = values.fillna(method="bfill",axis=0)
values.isna().sum()
# Co-ercing numeric type to all columns except Date
cols=values.columns.drop('Date')
values[cols] = values[cols].apply(pd.to_numeric,errors='coerce').round(decimals=1)
values.tail()
values.to_csv("Training Data_Values.csv")
imp = ['Gold','Brent Futures', 'CAC 40','Microsoft']
# Calculating Short term -Historical Returns
change_days = [1,3,5,14,21]
data = pd.DataFrame(data=values['Date'])
for i in change_days:
print(data.shape)
x= values[cols].pct_change(periods=i).add_suffix("-T-"+str(i))
data=pd.concat(objs=(data,x),axis=1)
x=[]
print(data.shape)
# Calculating Long term Historical Returns
change_days = [60,90,180,250]
for i in change_days:
print(data.shape)
x= values[imp].pct_change(periods=i).add_suffix("-T-"+str(i))
data=pd.concat(objs=(data,x),axis=1)
x=[]
print(data.shape)
#Calculating Moving averages for Microsoft
moving_avg = pd.DataFrame(values['Date'],columns=['Date'])
moving_avg['Date']=pd.to_datetime(moving_avg['Date'],format='%Y-%b-%d')
moving_avg['Microsoft/15SMA'] = (values['Microsoft']/(values['Microsoft'].rolling(window=15).mean()))-1
moving_avg['Microsoft/30SMA'] = (values['Microsoft']/(values['Microsoft'].rolling(window=30).mean()))-1
moving_avg['Microsoft/60SMA'] = (values['Microsoft']/(values['Microsoft'].rolling(window=60).mean()))-1
moving_avg['Microsoft/90SMA'] = (values['Microsoft']/(values['Microsoft'].rolling(window=90).mean()))-1
moving_avg['Microsoft/180SMA'] = (values['Microsoft']/(values['Microsoft'].rolling(window=180).mean()))-1
moving_avg['Microsoft/90EMA'] = (values['Microsoft']/(values['Microsoft'].ewm(span=90,adjust=True,ignore_na=True).mean()))-1
moving_avg['Microsoft/180EMA'] = (values['Microsoft']/(values['Microsoft'].ewm(span=180,adjust=True,ignore_na=True).mean()))-1
moving_avg = moving_avg.dropna(axis=0)
print(moving_avg.shape)
moving_avg.head()
#Merging Moving Average values to the feature space
print(data.shape)
data['Date']=pd.to_datetime(data['Date'],format='%Y-%b-%d')
data = pd.merge(left=data,right=moving_avg,how='left',on='Date')
print(data.shape)
data.isna().sum()
#Caluculating forward returns for Target
y = pd.DataFrame(data=values['Date'])
print(y.shape)
y['Microsoft-T+14']=values["Microsoft"].pct_change(periods=-14)
y['Microsoft-T+22']=values["Microsoft"].pct_change(periods=-22)
print(y.shape)
y.isna().sum()
# Removing NAs
print(data.shape)
data = data[data['Microsoft-T-250'].notna()]
y = y[y['Microsoft-T+22'].notna()]
print(data.shape)
print(y.shape)
#Adding Target Variables
data = pd.merge(left=data,right=y,how='inner',on='Date',suffixes=(False,False))
print(data.shape)
data.isna().sum()
data.to_csv("Training Data.csv",index=False)
corr = data.corr().iloc[:,-2:].drop(labels=['Microsoft-T+14','Microsoft-T+22'],axis=0)
import seaborn as sns
import numpy as np
sns.distplot(corr.iloc[:,0])
pd.set_option('display.max_rows', None)
corr_data = data.tail(2000).corr()
corr_data = pd.DataFrame(corr_data['Microsoft-T+14'])
#corr_data = corr_data.iloc[3:,]
corr_data = corr_data.sort_values('Microsoft-T+14',ascending=False)
#corr_data
sns.distplot(corr_data)
data = pd.read_csv("Training Data.csv")
!pip install matplotlib-venn
!apt-get -qq install -y libfluidsynth1
from google.colab import drive
drive.mount('/content/drive')
!pip install pycaret
from pycaret.utils import enable_colab
enable_colab()
data_22= data.drop(['Microsoft-T+14'],axis=1)
data_22.head()
a=setup(data_22,target='Gold-T+22',
ignore_features=['Date'],session_id=11,
silent=True,profile=False); |
Partager