Validate and Explain Probability Distributions: Assess the validity of a probability distribution using the concepts of outcome, sample space, and probability properties (e.g., disjoint outcomes, probabilities between 0 and 1, and total probabilities summing to 1).
Compute Probabilities Using Various Tools: Use logic, Venn diagrams, and probability rules to compute probabilities for events.
Understand and Compute Expectations and Variances: Explain the concepts of expectations and variances of random variables, and compute the expectation and variance of a linear combination of random variables.
Conduct Hypothesis Testing Using Simulation: Set up null and alternative hypotheses to test for independence between variables, and use simulation techniques to evaluate data support for these hypotheses.
sample() will randomly sample size many
sample from the vector x
sample(x, size, replace = FALSE, prob = NULL)
# example
x <- c("apple", "pear", "strawberry", "orange", "lemon")
sample(x, 3) # picks 3 unique fruits from the vector (without replacement)
## [1] "orange" "strawberry" "pear"
sample(x, 10, replace=TRUE) # picks 10 fruits, allowing repeats (sampling with replacement)
## [1] "apple" "strawberry" "lemon" "pear" "apple"
## [6] "strawberry" "strawberry" "apple" "pear" "orange"
#
#
#
Loops repeatedly execute a block of code for various elements.
# Example: Print numbers from 1 to 5
print("Example1 ")
## [1] "Example1 "
for (i in 1:5) {
print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
# Example 2: Print numbers from 1 to 5
print("Example2 ")
## [1] "Example2 "
for (i in c(1,2,3,4,5)) {
print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
# Example3: Simulate rolling a die 5 times and record it
print("Example3 ")
## [1] "Example3 "
rolls <- rep(0, 5) # Initialize a vector of length 5 with zeros to store roll results
for (i in 1:5) {
rolls[i] <- sample(1:6, 1) # Sample one number between 1 and 6 and assign it to the i-th position
}
rolls
## [1] 1 1 4 5 1
# For loop syntax
for (variable in sequence) {
# code you want to execute repeatedly until the end of the sequence
}
#
#
#
x <- c(0,1)
sample(x,1)
## [1] 0
rolls <- rep(0,10)
for (i in 1:10){
rolls[i] <- sample(x,1)
}
print(rolls)
## [1] 0 1 1 0 1 0 0 0 1 1
Functions allow you to encapsulate reusable blocks of code and parameters.
# Example: Function to calculate (a+b)^2
calc <- function(a,b){
output <- a+b
return(output)
}
calc <- function(#argument for input){
return(output) #output can be a value or a parameter
# or you don't need an output sometimes
}
# input or output not necessary
greet <- function(){
print("Hello there")
}
greet()
## [1] "Hello there"
greet()
## [1] "Hello there"
greet()
## [1] "Hello there"
# what is your function name?
# what should be inputs?
# what is the output?
Combine loops and functions for repeated tasks.
# Example: Roll a die 5 times using a loop within a function
roll_multiple <- function(runs) {
rolls <- rep(0, runs) # create rolls vector of 0
for (i in 1:runs) {
rolls[i] <- sample(1:6, 1)
}
return(rolls)
}
roll_multiple(5)
## [1] 3 4 3 1 3
#Outcome from those two dice
die1 <- sample(1:6, 1)
die2 <- sample(1:6, 1)
sum_of_two <- die1 + die2
sum_of_two
## [1] 5
for loop, assign outcome of two dice rolled into
myoutcome from 100 iterations.myoutcome = rep(0, 100) # make a vector of size 100 that has all zero's
for (i in 1:100){
dice1 <- sample(1:6, 1)
dice2 <- sample(1:6, 1)
sum_of_two <- dice1 + dice2
myoutcome[i] <- sum_of_two
}
head(myoutcome) # print first 6 entries
## [1] 7 10 3 4 3 6
replicate(), it carries out repeated tasks in a
more computationally efficient way.twodice_outcome <- function(){
dice1 <- sample(1:6, 1)
dice2 <- sample(1:6, 1)
sum_of_two <- dice1 + dice2
myoutcome <- sum_of_two
return(myoutcome)
}
twodice_outcome() # this function works as one simulation run
## [1] 8
myoutcome <- replicate(n=30, twodice_outcome()) # 10 runs
myoutcome
## [1] 2 10 5 2 6 9 7 11 5 9 10 5 4 7 8 10 9 8 9 8 5 6 7 9 9
## [26] 7 8 3 2 5
# for loop version (clasic)
n = 30 # assign 30 for rolling times
myoutcome <- rep(0, n) #initialize empty vector or size n
for (i in 1:n){
myoutcome[i] <- twodice_outcome() # store each outcome to ith entry in myoutcome vector
}
myoutcome # print out myoutcome
## [1] 5 10 9 7 9 9 6 7 3 10 3 7 7 11 7 8 10 9 10 5 6 7 12 8 5
## [26] 10 12 3 4 6
outcome_table <- table(myoutcome)
n <- length(myoutcome) # this length function will count how many unique output you have.
prob_outcome <- outcome_table/n # to calculate each probability, you divide each frequency by the total number of observation n
prob_outcome
## myoutcome
## 3 4 5 6 7 8 9
## 0.10000000 0.03333333 0.10000000 0.10000000 0.20000000 0.06666667 0.13333333
## 10 11 12
## 0.16666667 0.03333333 0.06666667
# replicate version (faster but little challenging)
myoutcome <- replicate(n=30, twodice_outcome())
outcome_table <- table(myoutcome)
n <- length(myoutcome)
prob_outcome <- outcome_table/n
prob_outcome
## myoutcome
## 3 4 5 6 7 8 9
## 0.13333333 0.06666667 0.06666667 0.10000000 0.16666667 0.03333333 0.13333333
## 10 11 12
## 0.16666667 0.10000000 0.03333333
# how to ensure that it is proper probability?
sum(prob_outcome) # it needs to add up to 1
## [1] 1
outcome_table * prob_outcome
## myoutcome
## 3 4 5 6 7 8 9
## 0.53333333 0.13333333 0.13333333 0.30000000 0.83333333 0.03333333 0.53333333
## 10 11 12
## 0.83333333 0.30000000 0.03333333
names(outcome_table) # we need this
## [1] "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"
values <- as.numeric(names(outcome_table) )
# or
# values <- 2:12
sum(values * prob_outcome)
## [1] 7.4
mu_hat = sum(values * prob_outcome)
mu_hat
## [1] 7.4
sigma_2_hat <- sum( (values-mu_hat)^2 * prob_outcome )
sigma_2_hat
## [1] 7.44
n and a list of
output that gives you the simulated expected value and variance.
(putting all together)sim_fn <- function(n=1000){
myoutcome = rep(0, n)
for (i in 1:n){
dice1 <- sample(1:6, 1)
dice2 <- sample(1:6, 1)
sum_of_two <- dice1 + dice2
myoutcome[i] <- sum_of_two
}
outcome_table <- table(myoutcome)
prob_outcome <- outcome_table/n
values <- as.numeric(names(outcome_table) ) # it takes out the each column name and make it as numeric type value and assign it to values variable
mu_hat = sum(values * prob_outcome)
sigma_2_hat <- sum( (values-mu_hat)^2 * prob_outcome )
output <- list(mu = mu_hat, sigma2 = sigma_2_hat) # list output
# output <- c(mu_hat, sigma_2_hat) # vector output
return(output)
}
sim_fn(100)
## $mu
## [1] 6.96
##
## $sigma2
## [1] 5.9784
sim_fn(1000)
## $mu
## [1] 6.968
##
## $sigma2
## [1] 5.738976
sim_fn(10000)
## $mu
## [1] 7.0333
##
## $sigma2
## [1] 5.889191
sim_fn(100000)
## $mu
## [1] 7.00663
##
## $sigma2
## [1] 5.854686