RStudios Flashcards
One way anova
mean(data_set$X)
sd(data_set$X)
fit <- aov(independent variable ~ dependent variable, data = data_set)
summary(fit)
Two way anova
insects_data <- read.csv(‘insects_data.csv’)
mean(insects_data$counts[insects_data$species == ‘Megacrania’ & insects_data$season == ‘Spring’])
fit <- aov(counts ~ species + season + species*season, insects_data)
summary(fit)
RBCD anova
data <- read.csv(‘oat_variety.csv’)
mean(data$yield[data$variety == ‘Golden.rain’])
var(data$yield[data$variety == ‘Golden.rain’])
fit <- aov(yield ~ variety + plot, data)
summary(fit)
SLR
library(readxl)
data <- read_excel(‘calls.xlsx’)
mean(data$Calls)
var(data$Calls)
sd(data$Calls)
cor( y = data$Executions, x = data$Calls)
plot( x = data$Calls, y = data$Executions, pch = 20, xlab = “Incoming phone calls”, ylab = “Trade executions per day”)
fit <- lm(Executions ~ Calls, data)
summary(fit)
anova(fit)
confint(fit)
MLR
data <- read.csv(‘multreg.csv’)
View(data)
cor(data[,0:5])
cor(data[,c(“Price”, “PlotSize”, “FloorArea”, “Trees”, “Distance”)])
str(data)
data$Pool <- as.factor(data$Pool)
str(data)
fit <- lm(Price ~ PlotSize + FloorArea + Trees + Distance + Pool + PlotSize*FloorArea, data)
summary(fit)
confint(fit)
predict(fit)
Model Building & Logistic Regression
logreg_data <- read.csv(‘logreg.csv’)
fit <- glm(cases ~ sex + income, data = logreg_data, family = ‘binomial’)
summary(fit)
mb_data <- read.csv(‘step.csv’)
str(mb_data)
mb_data$medschl <- as.factor(mb_data$medschl)
mb_data$region <- as.factor(mb_data$region)
fit.full <- lm(length ~ . , mb_data)
fit.empty <- lm(length ~ 1, mb_data)
step.model <- step(fit.empty, scope = formula(fit.full), direction = ‘forward’)
Time Series 1
library(forecast)
library(fpp3)
library(ggplot2)
df <- forecast::wineind
autoplot(df) + ggtitle(‘Australian total wine sales’) + ylab(“litres”)
(sum(df[61:72])/12 + sum(df[62:73])/12)/2
autoplot(df) + autolayer(ma(df,12), series = “MA(12)”)
decomp <- decompose(df, type = ‘multiplicative’)
plot(decomp)
simp_mod <- snaive(df, h = 24)
autoplot(df) + autolayer(simp_mod)
Time Series 2
dt <- window(df,start = c(1980,1),end = c(1985,12))
lam <- BoxCox.lambda(dt)
dt_trans1 <- BoxCox(dt,lambda = lam)
dt_trans2 <- diff(dt_trans1,lag = 12)
autoplot(dt_trans2)
ndiffs(dt_trans2)
Acf(dt_trans2)
checkresiduals(dt_trans2)
Time Series 3
hw_mod <- hw(train, seasonal = ‘multiplicative’, h = 24)
autoplot(train) + autolayer(hw_mod)
hw_mod[[“model”]]
ggAcf(train)
accuracy(hw_mod,test)
Time Series 4
dt <- ts(fpp3::us_change$Consumption)
train <- window(dt,end = 170)
test <- window(dt,start = 171)
ggAcf(train)
ggPacf(train)
mod1 <- Arima(dt, order = c(3,0,3))
mod2 <- auto.arima(dt)
checkresiduals(mod2)