Part 2 Basic Descriptive Statistics of Financial Data

Objectives:

  • The price of a financial instrument over time constitutes a time series
  • Likewise, the return process of that instrument also constitute a time series
  • We will be looking at descriptive and inferential statistics of those time series
    • Both for getting familiar with the methods and for
    • a better understanding of computational tools ( that is R for this course )

Getting the Data

# load the library 
library("quantmod")

# get amazon.com price data frm the beginning of the year until september 9. 2020
data.AMZN <- getSymbols("AMZN",from="2020-01-02",to="2020-09-08",auto.assign=FALSE)
Code language: R (r)
head(data.AMZN)
Code language: R (r)
           AMZN.Open AMZN.High AMZN.Low AMZN.Close AMZN.Volume AMZN.Adjusted
2020-01-02   93.7500   94.9005  93.2075    94.9005    80580000       94.9005
2020-01-03   93.2250   94.3100  93.2250    93.7485    75288000       93.7485
2020-01-06   93.0000   95.1845  93.0000    95.1440    81236000       95.1440
2020-01-07   95.2250   95.6945  94.6020    95.3430    80898000       95.3430
2020-01-08   94.9020   95.5500  94.3220    94.5985    70160000       94.5985
2020-01-09   95.4945   95.8910  94.7900    95.0525    63346000       95.0525
tail(data.AMZN)
Code language: R (r)
           AMZN.Open AMZN.High AMZN.Low AMZN.Close AMZN.Volume AMZN.Adjusted
2020-08-28  171.1500  171.6685 169.3250   170.0900    57940000      170.0900
2020-08-31  170.4495  174.7500 170.2500   172.5480    83718000      172.5480
2020-09-01  174.4790  175.6935 173.3500   174.9560    68644000      174.9560
2020-09-02  177.3500  177.6125 174.3345   176.5725    78630000      176.5725
2020-09-03  174.2500  174.4205 165.1500   168.4000   163222000      168.4000
2020-09-04  165.9000  169.0750 155.5565   164.7310   175636000      164.7310

Descriptive Statistics

Plotting the data

plot(data.AMZN$AMZN.Close)
Code language: R (r)

Some Commonly Used Indicators

Get longer time series

data.AMZN <- getSymbols("AMZN",from="2018-01-01",to="2020-09-08",auto.assign = FALSE)
Code language: R (r)

Get only closing price of data

AMZN.sma <- data.AMZN[,4]
Code language: R (r)

Display a few rows and the last row

AMZN.sma[c(1:3,nrow(AMZN.sma)),]
Code language: R (r)
           AMZN.Close
2018-01-02    59.4505
2018-01-03    60.2100
2018-01-04    60.4795
2020-09-04   164.7310

Moving average over 50 and 200 days

AMZN.sma$sma50 <- rollmeanr(AMZN.sma$AMZN.Close,k=50)
AMZN.sma$sma200 <- rollmeanr(AMZN.sma$AMZN.Close,k=200)
AMZN.sma[c(1:3,nrow(AMZN.sma)),]Code language: R (r)
           AMZN.Close    sma50   sma200
2018-01-02    59.4505       NA       NA
2018-01-03    60.2100       NA       NA
2018-01-04    60.4795       NA       NA
2020-09-04   164.7310 157.2444 117.8078
data.AMZN$AMZN.CloseCode language: R (r)
           AMZN.Close
2018-01-02    59.4505
2018-01-03    60.2100
2018-01-04    60.4795
2018-01-05    61.4570
2018-01-08    62.3435
2018-01-09    62.6350
2018-01-10    62.7165
2018-01-11    63.8340
2018-01-12    65.2600
2018-01-16    65.2430
       ...           
2020-08-24   165.3730
2020-08-25   167.3245
2020-08-26   172.0925
2020-08-27   170.0000
2020-08-28   170.0900
2020-08-31   172.5480
2020-09-01   174.9560
2020-09-02   176.5725
2020-09-03   168.4000
2020-09-04   164.7310
plot(data.AMZN$AMZN.Close)
Code language: R (r)
plot(AMZN.sma$sma50)Code language: R (r)
plot(AMZN.sma$sma200)Code language: R (r)

Obtaining Rolling standard deviations

AMZN.sd<-data.AMZN[,4]
AMZN.sd$avg<-rollmeanr(AMZN.sd$AMZN.Close,k=20)
AMZN.sd$sd<-rollapply(AMZN.sd$AMZN.Close,width=20,FUN=sd,fill=NA)
AMZN.sdCode language: PHP (php)
           AMZN.Close      avg       sd
2018-01-02    59.4505       NA       NA
2018-01-03    60.2100       NA       NA
2018-01-04    60.4795       NA       NA
2018-01-05    61.4570       NA       NA
2018-01-08    62.3435       NA       NA
2018-01-09    62.6350       NA       NA
2018-01-10    62.7165       NA       NA
2018-01-11    63.8340       NA       NA
2018-01-12    65.2600       NA       NA
2018-01-16    65.2430       NA       NA
       ...                             
2020-08-24   165.3730 158.6092 4.529263
2020-08-25   167.3245 159.4746 4.453943
2020-08-26   172.0925 160.4954 4.890786
2020-08-27   170.0000 161.3657 4.958957
2020-08-28   170.0900 161.9585 5.264138
2020-08-31   172.5480 162.8061 5.543032
2020-09-01   174.9560 163.7069 5.985849
2020-09-02   176.5725 164.5229 6.573644
2020-09-03   168.4000 164.8804 6.580700
2020-09-04   164.7310 165.1983 6.400910

Fitting a curve to price data

getSymbols("DJIA",src="FRED")
serie=DJIA["2015/2019"]
price=as.numeric(serie)
time = index(serie)
x=1:length(price)
model=lm(log(price)~x)
expo=exp(model$coef[1]+model$coef[2]*x)
plot(x=time,y=price, main="Dow Jones",type="l")
lines(time,expo,col=2,lwd=2)Code language: R (r)
lm(formula = log(price) ~ x)
Code language: R (r)

Call:
lm(formula = log(price) ~ x)

Coefficients:
(Intercept) x
9.6876626 0.0004355

Percent and Logarithmic Returns

Daily price returns

data.IBM <- getSymbols("IBM", from="2010-12-31",to="2013-12-31", auto.assign = FALSE)
IBM.price <- data.IBM[,4]
str(data.IBM)Code language: R (r)
An xts object on 2010-12-31 / 2013-12-30 containing:
Data: double [754, 6]
Columns: IBM.Open, IBM.High, IBM.Low, IBM.Close, IBM.Volume … with 1 more column
Index: Date [754] (TZ: “UTC”)
xts Attributes:
$ src : chr “yahoo”
$ updated: POSIXct[1:1], format: “2023-10-13 16:27:12”
IBM.price[c(1:3,nrow(IBM.price)),]
Code language: R (r)
           IBM.Close
2010-12-31  140.3059
2011-01-03  140.9943
2011-01-04  141.1472
2013-12-30  178.2122

the function ‘Delt’ obtains percent differences in data set.

IBM.price$IBM.ret<-Delt(IBM.price$IBM.Close)
IBM.price[c(1:3,nrow(IBM.price)),]
Code language: R (r)
           IBM.Close     IBM.ret
2010-12-31  140.3059          NA
2011-01-03  140.9943 0.004905988
2011-01-04  141.1472 0.001084933
2013-12-30  178.2122 0.007186107
IBM.price$IBM.log.ret<-diff(log(IBM.price$IBM.Close))
IBM.price[c(1:3,nrow(IBM.price)),]Code language: R (r)
           IBM.Close     IBM.ret IBM.log.ret
2010-12-31  140.3059          NA          NA
2011-01-03  140.9943 0.004905988 0.004893993
2011-01-04  141.1472 0.001084933 0.001084345
2013-12-30  178.2122 0.007186107 0.007160410
plot(IBM.price$IBM.ret)
Code language: R (r)
plot(IBM.price$IBM.log.ret)
Code language: R (r)

More Examples

Getting basic statistics and displaying data

library("quantmod")
##retrieve historical price data for General Electric Co. from Yahoo Finance 
getSymbols('GE',src='yahoo', from="2000-01-01", to="2009-12-30")
##to see headers of file
## extract Adjusted Close
geAdj = GE$GE.Adjusted["2000-01-01/2000-01-20"]
##compute max, min and mean
max(geAdj); min(geAdj); mean(geAdj)
Code language: R (r)
168.641677856445 157.673141479492 163.422424316406Code language: CSS (css)
## draw a chart
chartSeries(GE)
Code language: R (r)

Getting 4 data sets simultaneously

symbols <- c('GE','KO','AAPL','MCD')
getSymbols(symbols,src='yahoo',from="2012-02-01",to="2013-02-01")
# 'GE''KO''AAPL''MCD'
#obtain adjusted closed
GEad= GE$GE.Adjusted; KOad=KO$KO.Adjusted;
AAPLad=AAPL$AAPL.Adjusted; MCDad = MCD$MCD.Adjusted
#compute cumulative sum (cumsum) of daily returns (Delt)
#Remove first term of the series, with [-1,], since cumsum is not defined for it.
ge = cumsum((Delt(GEad)*100)[-1,])
ko = cumsum((Delt(KOad)*100)[-1,])
ap = cumsum((Delt(AAPLad)*100)[-1,])
md = cumsum((Delt(MCDad)*100)[-1,])
###range of values for the plot
lim = c(min(ge,ko,ap,md),max(ge,ko,ap,md))
###the plot 
plot(ge,main="",ylim=lim,xlab="dates",ylab="% benefits") 
lines(ko,col="red");
lines(ap,col="violet"); lines(md,col="yellow") 
legend(x="topleft",cex=0.4,c("GE","KO","AAPL","MCD"),
lty=1, col=c("black","red","violet","yellow"))
Code language: R (r)

Leave a Reply

Your email address will not be published. Required fields are marked *