answer:exercise type
factor = c(light exercise, moderate exercise, heavy exercise)
data <- read.csv("FinanceCanada.csv")
t.test(data$DPS, data$Industry, alternative = "two.sided", mu = 0,
paired = FALSE, var.equal = FALSE, conf.level = 0.95)
H_0: mu_1 = mu_2
H_1: mu_1 != mu_2
finance <- read.csv(file="FinanceCanada.csv", header=TRUE, sep=",")
attach(finance)
t.test(DPS~IG, data=finance, var.equal=T)
setwd('E:/Grass')
Grass <- read.csv('Grass.csv', header = TRUE)
head(Grass)
Grass$Species <- as.factor(Grass$Species)
Grass$Fertilizer <- as.factor(Grass$Fertilizer)
summary(Grass)
Treatments <- cbind(Grass$Species,Grass$Fertilizer)
colnames(Treatments) <- c('Species', 'Fertilizer')
attach(Grass)
boxplot(Grass$Grass ~ Treatments, xlab = 'Species', ylab = 'Grass')
library(readr)
finance <- read_csv("FinanceCanada.csv")
library(car)
library(dplyr)
finance$TypeOfIndustry <- as.factor(finance$TypeOfIndustry)
collapse_factor <- function(x){
factor_num <- as.integer(x)
if(factor_num %in% c(1,2,3)){
factor_num <- 1
}
else if(factor_num %in% c(4,6)){
factor_num <- 2
}
else if(factor_num %in% c(5,7)){
factor_num <- 3
}
return(factor_num)
}
finance$TypeOfIndustry <- sapply(finance$TypeOfIndustry, collapse_factor)
finance$TypeOfIndustry <- as.factor(finance$TypeOfIndustry)
DPS <- finance$DPS
TypeOfIndustry <- finance$TypeOfIndustry
lm
##
## Call:
## lm(formula = grass ~ fertilizer * species, data = grass)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.979 -3.917 0.702 3.302 14.873
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -19.0584 7.8839 -2.424 0.0206
## fertilizer2 12.7062 8.6050 1.475 0.1554
## fertilizer3 12.5899 8.5996 1.461 0.1602
## species2 14.0992 8.5995 1.639 0.1193
## fertilizer2:species2 5.8918 9.9617 0.591 0.5630
## fertilizer3:species2 5.7755 9.9661 0.580 0.5697
mean(df[df$Industry==1,]$Dividend/df[df$Industry==1,]$SharesOutstanding) - mean(df[df$Industry==2,]$Dividend/df[df$Industry==2,]$SharesOutstanding) > 0
grasses <- read.csv("Grass.csv")
grasses$Species <- factor(grasses$Species, labels = c("A", "B"))
grasses$Fertilizer <- factor(grasses$Fertilizer, labels = c("1", "2", "3"))
aovmod <- aov(Grass ~ Species * Fertilizer, data = grasses)
summary(aovmod)
factor(c("light", "moderate", "heavy"))
salaries <- c(80000, 82000, 85000, 88000, 90000, 90000, 91000, 92000, 94000, 94000,
95000, 95000, 97000, 97000, 97000, 98000, 98000, 98000, 100000, 100000)
t.test(salaries, mu = 90000)
# One Sample t-test
# data: salaries
# t = 1.9146, df = 19, p-value = 0.06893
# alternative hypothesis: true mean is not equal to 90000
# 95 percent confidence interval:
# 87544.97 99855.03
# sample estimates:
# mean of x
# 93774.74
H0: There is no relationship between species and nitrogen level
H1: There is a relationship between species and nitrogen level
H0: There is no relationship between fertilizer and nitrogen level
H1: There is a relationship between fertilizer and nitrogen level
grass <- read.csv("grass.csv")
grass$Species <- factor(grass$Species, levels=1:2, labels=c("A", "B"))
grass$Fertilizer <- factor(grass$Fertilizer, levels=1:3, labels=c("A", "B", "C"))
boxplot(grass$Height ~ grass$Fertilizer, xlab="Fertilizer", ylab="Height", main="grass height")
boxplot(grass$Height ~ grass$Species, xlab="Species", ylab="Height", main="grass height")
anova(lm(grass$Height ~ grass$Species + grass$Fertilizer))
summary(lm(grass$Height ~ grass$Species * grass$Fertilizer))
grass = read.csv("Grass.csv")
head(grass)
names(grass)
for(i in 1:10){
print(grass[i,])
}
str(grass)
summary(grass)
# Convert to factor
grass$Species = factor(grass$Species)
grass$Fertilizer = factor(grass$Fertilizer)
# Run the ANOVA
anova_1 <- aov(Yield ~ Species + Fertilizer, data = grass)
# Summarize the results
summary(anova_1)
# Tukey test
TukeyHSD(anova_1)
# Rewrite the anova
anova_2 <- aov(Yield ~ Species * Fertilizer, data = grass)
summary(anova_2)
grass <- read.csv("Grass.csv")
factor1 <- c("light", "moderate", "heavy")
data <- read.csv("Grass.csv")
head(data)
# number of groups is equal to the number of rows
nrow(data)
# number of plots is equal to the number of columns
ncol(data)
data$Fertilizer <- factor(data$Fertilizer)
data$Species <- factor(data$Species)
H0: there is no difference in the dividend per share ratio between industries
H1: there is a difference in the dividend per share ratio between industries
H0: u(Real Estate) = u(Financial) = u(Health) = u(Mining) = u(Manufacturing) = u(Utilities) = u(IT)
Ha: at least one of the means is different
Null hypothesis: all cards bring in the same number of applications
Alternative hypothesis: there are differences among the number of applications for various cards
grass <- read.csv("Grass.csv")
grass
grass$Species <- factor(grass$Species, labels = c("Species_1","Species_2"))
grass$Fertilizer <- factor(grass$Fertilizer, labels = c("Fertilizer_A","Fertilizer_B","Fertilizer_C"))
head(grass)
library(ggplot2)
ggplot(data = grass, aes(x = Species, y = Blades, color = Fertilizer)) + geom_boxplot()
grass.aov <- aov(Blades ~ Species + Fertilizer, data = grass)
summary(grass.aov)
TukeyHSD(grass.aov)
grass = read.csv("grass.csv")
head(grass)
attach(grass)
grass$grass = factor(grass$grass, levels = c(1, 2), labels = c("species1", "species2"))
grass$fertilizer = factor(grass$fertilizer,
levels = c(1, 2, 3), labels = c("fertilizer1", "fertilizer2", "fertilizer3"))
boxplot(grass$blades ~ grass$grass)
boxplot(grass$blades ~ grass$grass + grass$fertilizer)
aov(blades ~ grass + fertilizer, data = grass)
"light, moderate and heavy disire"
> df <- read.csv("FinanceCanada.csv")
> df
sector marketCap cash debt profit revenue price dividend
1 Real Estate 3.391880 2.817079 51 2.92786 5.609708 1.591719 0.000000e+00
2 Financials 15.914390 0.948429 48 3.36506 6.037782 7.684375 5.824243e-03
3 Industrials 13.367450 1.573441 39 2.84042 5.111682 6.977952 2.966864e-03
4 Industrials 12.686620 1.626580 23 1.97762 3.917748 5.824494 4.395904e-03
5 Industrials 12.288600 1.396398 40 3.21616 5.455960 5.846487 5.084980e-03
6 Industrials 11
finance = read.csv("FinanceCanada.csv")
library(car)
attach(finance)
mod = aov(DPS ~ as.factor(TypeOfIndustry), finance)
summary(mod)
detach(finance)
factor = c("Light exercise", "Moderate exercise", "Heavy exercise")
finance <- read.csv("FinanceCanada.csv")
head(finance)
### HYPOTHESIS 1: a)
H0: The mean dividend per share of real estate industry is equal to the mean dividend per share of
financial industry
Ha: The mean dividend per share of real estate industry is not equal to the mean dividend per share of
financial industry
grass <- read.csv("grass.csv", header = T)
grass
industry <- read.csv("FinanceCanada.csv")
industry1 <- subset(industry, Industry == 1)
industry2 <- subset(industry, Industry == 2)
industry3 <- subset(industry, Industry == 3)
industry4 <- subset(industry, Industry == 4)
industry5 <- subset(industry, Industry == 5)
industry6 <- subset(industry, Industry == 6)
industry7 <- subset(industry, Industry == 7)
# Null hypothesis: What is the average dividend per share ratio of the entire population?
# Alternative hypothesis: The average dividend per share ratio of industry 1 is different than the population's.
# The alternative hypothesis is the same for all industries
# The null hypothesis is that the mean is equal to 0
# The alternative hypothesis is that the mean is not equal to 0
# The alternative hypothesis is that the mean is greater than 0
# The alternative hypothesis is that the mean is less than 0
# The alternative hypothesis is that the mean is not equal to the population's mean
# Null hypothesis: What is
> H0: There is no difference in the dividend per share ratio between the seven industries
> H1: There is a difference in the dividend per share ratio between the seven industries
grassData = read.csv("Grass.csv")
grassData$Species = factor(grassData$Species)
grassData$Fertilizer = factor(grassData$Fertilizer)
boxplot(grassData$Blades ~ grassData$Species)
boxplot(grassData$Blades ~ grassData$Fertilizer)
boxplot(grassData$Blades ~ grassData$Species + grassData$Fertilizer)
#H0: no difference
#H1: difference
H0: mu(realestate) = mu(financial)
Ha: mu(realestate) != mu(financial)
options(scipen=999)
data <- read.csv("FinanceCanada.csv")
AOV <- aov(DividendPerShareRatio~as.factor(TypeOfIndustry), data=data)
summary(AOV)
Grass <- read.csv("Grass.csv", header = TRUE)
Grass
str(Grass)
## type of species
Grass$Species <- as.factor(Grass$Species)
## type of fertilizer
Grass$Fertilizer <- as.factor(Grass$Fertilizer)
## you can use this to see the level of Species
summary(Grass$Species)
# you can use this to see the level of Fertilizer
summary(Grass$Fertilizer)
# use this to see the data
head(Grass)
> # Null Hypothesis:
"There is no significant difference in the mean Dividend per share ratio between industries"
> # Alternative Hypothesis:
"There is significant difference in the mean Dividend per share ratio between industries"
#H0: μdps1 = μdps2 = μdps3 = μdps4 = μdps5 = μdps6 = μdps7
#Ha: Not all groups have the same mean
# R
factor(rep(c("light", "moderate", "heavy"), 30))
# add function that takes in the dataframe and returns the anova table
summary(aov(Blades ~ Species + Fertilizer, data = df))
data <- read.csv("Grass.csv")
head(data)
> h0 <- "Real estate, financial and pharmaceuticals/drugs companies have a higher dividend per share ratio"
> h1 <- "Real estate, financial and pharmaceuticals/drugs companies do not have a higher dividend per share ratio"
# Factor is exercise
#Using the Salary data set, test whether there is a difference in salaries between males and females.
#H0: μ1 - μ2 = 0
#H1: μ1 - μ2 != 0
# # t.test(salary ~ sex)
# # Welch Two Sample t-test
# #
# # data: salary by sex
# # t = -0.9397, df = 251.4, p-value = 0.3489
# # alternative hypothesis: true difference in means is not equal to 0
# # 95 percent confidence interval:
# # -2400.9750 1107.6960
# # sample estimates:
# # mean in group F mean in group M
# # 46992.869 48625.963
data <- read.csv("Grass.csv")
data <- data[data$Species != 0,]
data <- data[data$Fertilizer != 0,]
data$Species <- as.factor(data$Species)
data$Fertilizer <- as.factor(data$Fertilizer)
mod <- aov(Blades ~ Species * Fertilizer, data = data)
summary(mod)
#H0: mu1 = mu2 = ... = mu13
#H1: at least one mu different
# ANSWER:
1. exercise intensity
2. time of day
# Read the data
grass = read.csv("Grass.csv")
names(grass) = c("grass", "fertilizer", "blades")
head(grass)
# Convert the factors to levels
grass$grass_lev = factor(grass$grass, labels = c("1", "2"))
grass$fertilizer_lev = factor(grass$fertilizer, labels = c("1", "2", "3"))
# Create interaction variable
grass$grass_fer_int = interaction(grass$grass_lev, grass$fertilizer_lev)
# Display head of data
head(grass)
# Create side-by-side boxplot
boxplot(blades ~ grass_fer_int, data = grass, xlab = "Grass species and Fertilizer", ylab = "Number of blades")
# Create boxplot with the same x-axis
boxplot(blades ~ grass_lev + fertilizer_lev, data = grass, xlab = "Grass species and Fertilizer", ylab = "Number of blades")
factor(sleep_data$exercise)
h0: mu1 = mu2
h1: mu1 != mu2
# H0: μ= $90 000
# H1: μ≠ $90 000
# test: t-test
#Null hypothesis
#Alternative hypothesis
#t-test
t.test(Income~Gender, data = Salary)
FinanceCanada <- read.csv(file = "FinanceCanada.csv", header = TRUE)
attach(FinanceCanada)
FinanceCanada$TypeOfIndustry <- as.factor(FinanceCanada$TypeOfIndustry)
library(car)
DPS.aov <- aov(DPS ~ TypeOfIndustry, data = FinanceCanada)
summary(DPS.aov)
plot(DPS.aov)
x <- c(230, 270, 270, 270, 270, 290, 290, 320, 320, 380)
y <- c(200, 200, 200, 200, 200, 200, 200, 200, 200, 200)
H0: mu1 = mu2 = ... = mu13
Ha: at least one mu's is different from others
> H0: u1 = u2 =...= u7
> Ha: at least one of the means is different
factor(c('Light','Moderate','Heavy'))
FinanceCanada<-read.csv(file="FinanceCanada.csv",header=TRUE,sep=",")
attach(FinanceCanada)
summary(FinanceCanada)
FinanceCanada$TypeOfIndustry<-as.factor(TypeOfIndustry)
summary(FinanceCanada)
options(repr.plot.width=4, repr.plot.height=4)
plot(FinanceCanada$TypeOfIndustry,FinanceCanada$DPS,ylab="DPS",xlab="TypeOfIndustry")
# fit anova
fit<-lm(DPS~TypeOfIndustry)
anova(fit)
h0: there is no difference in the dividend per share ratio across industries
h1: there is a difference in the dividend per share ratio across industries
factor(c(1:3), levels=1:3, labels=c('A', 'B', 'C'))
# type of exercise
# a) State the null and alternative hypotheses
Ho: Dividend per share ratio is same for all type of industries
H1: Dividend per share ratio is not same for all type of industries
H0: There is no difference in the mean dividend per share ratio of companies in each of the seven industry groups.
Ha: There is a difference in the mean dividend per share ratio of companies in each of the seven industry groups.
H0: µ1 = µ2
Ha: µ1 != µ2
mydata <- read.csv('C:/Users/Admin/Downloads/mydata.csv')
data <- mydata[1:9, ]
f <- aov(data)
summary(f)