R Programming Exercises

Author
Affiliation

Jihong Zhang*, Ph.D

Educational Statistics and Research Methods (ESRM) Program*

University of Arkansas

1 R Programming Exercises

This file contains exercises to practice the concepts covered in the R introduction. Each exercise includes questions in comments and solutions in hidden code chunks.

1.1 Note to Students

  • Try to solve each exercise before looking at the solutions
  • Copy the Questions (the commented lines) into a new R script file, and write your solutions in that file
  • Practice running the code in RStudio
  • Experiment with modifying the code to understand how it works
  • Use the help documentation (?function_name) when you’re unsure about a function

1.2 Exercise 1: Basic Data Types and Vectors

# Question 1: Create a numeric vector with values 10, 20, 30, 40, 50
# Question 2: Create a character vector with your name and two hobbies
# Question 3: Create a logical vector with TRUE, FALSE, TRUE, FALSE
# Question 4: Check the class and length of each vector you created

# Write your code here:
Solution
# Solutions:
# 1. Create numeric vector
numbers <- c(10, 20, 30, 40, 50)

# 2. Create character vector
personal_info <- c("Your Name", "Reading", "Swimming")

# 3. Create logical vector
logical_values <- c(TRUE, FALSE, TRUE, FALSE)

# 4. Check properties
class(numbers)
length(numbers)
class(personal_info)
length(personal_info)
class(logical_values)
length(logical_values)

1.3 Exercise 2: Vector Operations

# Question 1: Create two numeric vectors x = (1, 3, 5, 7, 9) and y = (2, 4, 6, 8, 10)
# Question 2: Calculate x + y, x * y, and x^2
# Question 3: Find the mean, sum, and standard deviation of vector x
# Question 4: Create a sequence from 1 to 20 with step size 2

# Write your code here:
Solution
# Solutions:
# 1. Create vectors
x <- c(1, 3, 5, 7, 9)
y <- c(2, 4, 6, 8, 10)

# 2. Vector operations
x + y
x * y
x^2

# 3. Summary statistics
mean(x)
sum(x)
sd(x)

# 4. Sequence
seq(1, 20, by = 2)

1.4 Exercise 3: Factors and Categorical Data

# Question 1: Create a factor from the vector: c("Low", "Medium", "High", "Low", "Medium")
# Question 2: Create an ordered factor with levels: "Beginner", "Intermediate", "Advanced"
# Question 3: Group ages 18, 25, 30, 35, 40, 45, 50 into three categories: "Young" (<30), "Middle" (30-45), "Senior" (>45)
# Question 4: Create a frequency table of your factor from question 1

# Write your code here:
Solution
# Solutions:
# 1. Create factor
levels_factor <- factor(c("Low", "Medium", "High", "Low", "Medium"))

# 2. Create ordered factor
skill_levels <- factor(c("Beginner", "Intermediate", "Advanced", "Beginner", "Advanced"),
                      levels = c("Beginner", "Intermediate", "Advanced"),
                      ordered = TRUE)

# 3. Group ages
ages <- c(18, 25, 30, 35, 40, 45, 50)
age_groups <- cut(ages, 
                 breaks = c(0, 30, 45, 100),
                 labels = c("Young", "Middle", "Senior"),
                 include.lowest = TRUE)

# 4. Frequency table
table(levels_factor)

1.5 Exercise 4: Data Frames and Basic Operations

# Question 1: Create a data frame with columns: Name, Age, City, and Salary
# Question 2: Add a new column called "Age_Group" based on age (<30="Young", 30-50="Middle", >50="Senior")
# Question 3: Find the mean salary by age group
# Question 4: Select only people from a specific city

# Write your code here:
Solution
# Solutions:
# 1. Create data frame
employees <- data.frame(
  Name = c("Alice", "Bob", "Charlie", "Diana", "Eve"),
  Age = c(25, 35, 42, 28, 55),
  City = c("NYC", "LA", "Chicago", "NYC", "Boston"),
  Salary = c(50000, 65000, 75000, 55000, 80000)
)

# 2. Add age group column
employees$Age_Group <- ifelse(employees$Age < 30, "Young",
                             ifelse(employees$Age <= 50, "Middle", "Senior"))

# 3. Mean salary by age group
aggregate(Salary ~ Age_Group, data = employees, FUN = mean)

# 4. Select NYC employees
nyc_employees <- employees[employees$City == "NYC", ]

1.6 Exercise 5: Data Manipulation with dplyr

# Question 1: Load the dplyr package and create a sample data frame
# Question 2: Use filter() to select rows where age > 30
# Question 3: Use select() to keep only Name and Salary columns
# Question 4: Use mutate() to create a new column "Salary_K" (salary in thousands)
# Question 5: Use group_by() and summarize() to find mean salary by city

# Write your code here:
Solution
# Solutions:
library(dplyr)

# 1. Create sample data
sample_data <- data.frame(
  Name = c("John", "Jane", "Mike", "Sarah", "Tom"),
  Age = c(25, 32, 28, 35, 29),
  City = c("NYC", "LA", "NYC", "Chicago", "LA"),
  Salary = c(45000, 60000, 52000, 70000, 58000)
)

# 2. Filter age > 30
older_employees <- sample_data |> 
  filter(Age > 30)

# 3. Select specific columns
name_salary <- sample_data |> 
  select(Name, Salary)

# 4. Create new column
sample_data_with_k <- sample_data |> 
  mutate(Salary_K = Salary / 1000)

# 5. Group and summarize
city_salary_summary <- sample_data |> 
  group_by(City) |> 
  summarize(
    Mean_Salary = mean(Salary),
    Count = n(),
    .groups = "drop"
  )

1.7 Exercise 6: Data Reshaping (Wide to Long)

# Question 1: Create a wide format data frame with test scores at three time points
# Question 2: Reshape the data from wide to long format
# Question 3: Calculate the mean score for each time point
# Question 4: Create a new column showing score improvement from Time1 to Time3

# Write your code here:
Solution
# Solutions:
library(tidyr)

# 1. Create wide format data
wide_data <- data.frame(
  ID = 1:5,
  Score_Time1 = c(75, 82, 68, 90, 78),
  Score_Time2 = c(78, 85, 72, 92, 81),
  Score_Time3 = c(82, 88, 76, 95, 85)
)

# 2. Reshape to long format
long_data <- wide_data |> 
  pivot_longer(
    cols = starts_with("Score_"),
    names_to = "Time",
    values_to = "Score"
  ) |> 
  mutate(Time = gsub("Score_Time", "", Time))

# 3. Mean score by time
time_means <- long_data |> 
  group_by(Time) |> 
  summarize(Mean_Score = mean(Score), .groups = "drop")

# 4. Score improvement
improvement_data <- wide_data |> 
  mutate(Improvement = Score_Time3 - Score_Time1)

1.8 Exercise 7: Basic Statistics and Visualization

# Question 1: Create a numeric vector and calculate basic statistics (mean, median, sd, min, max)
# Question 2: Create a histogram of your data
# Question 3: Create a boxplot comparing two groups
# Question 4: Create a scatter plot with two variables

# Write your code here:
Solution
# Solutions:
# 1. Create data and calculate statistics
data_vector <- c(12, 15, 18, 22, 25, 28, 30, 32, 35, 38, 40, 42, 45, 48, 50)

mean(data_vector)
median(data_vector)
sd(data_vector)
min(data_vector)
max(data_vector)
summary(data_vector)

# 2. Histogram
hist(data_vector, main = "Distribution of Data", xlab = "Values", col = "lightblue")

# 3. Boxplot (create two groups)
group1 <- c(12, 15, 18, 22, 25)
group2 <- c(35, 38, 40, 42, 45)
boxplot(group1, group2, names = c("Group 1", "Group 2"), main = "Comparison of Groups")

# 4. Scatter plot
x_values <- 1:15
y_values <- data_vector
plot(x_values, y_values, main = "Scatter Plot", xlab = "X", ylab = "Y", pch = 16)

1.9 Exercise 8: Control Structures and Functions

# Question 1: Write an if-else statement to classify scores as "Pass" (>=60) or "Fail" (<60)
# Question 2: Use a for loop to calculate the square of numbers 1 to 10
# Question 3: Use sapply() to calculate the square root of numbers 1 to 10
# Question 4: Write a function that calculates the area of a circle given the radius

# Write your code here:
Solution
# Solutions:
# 1. If-else for score classification
score <- 75
if (score >= 60) {
  result <- "Pass"
} else {
  result <- "Fail"
}

# 2. For loop for squares
squares <- numeric(10)
for (i in 1:10) {
  squares[i] <- i^2
}

# 3. sapply for square roots
numbers <- 1:10
square_roots <- sapply(numbers, sqrt)

# 4. Function for circle area
circle_area <- function(radius) {
  area <- pi * radius^2
  return(area)
}

# Test the function
circle_area(5)

1.10 Exercise 9: Working with Missing Data

# Question 1: Create a vector with some NA values
# Question 2: Count how many NA values are in your vector
# Question 3: Remove NA values from your vector
# Question 4: Replace NA values with the mean of non-NA values

# Write your code here:
Solution
# Solutions:
# 1. Create vector with NA
data_with_na <- c(1, 2, NA, 4, 5, NA, 7, 8, NA, 10)

# 2. Count NA values
na_count <- sum(is.na(data_with_na))

# 3. Remove NA values
data_clean <- na.omit(data_with_na)
# or
data_clean2 <- data_with_na[!is.na(data_with_na)]

# 4. Replace NA with mean
data_imputed <- data_with_na
data_imputed[is.na(data_imputed)] <- mean(data_with_na, na.rm = TRUE)

1.11 Exercise 10: Comprehensive Data Analysis

# Question 1: Create a comprehensive dataset with multiple variables
# Question 2: Perform exploratory data analysis (summary statistics, visualizations)
# Question 3: Create new variables through transformations
# Question 4: Group the data and calculate summary statistics by group

# Write your code here:
Solution
# Solutions:
# 1. Create comprehensive dataset
comprehensive_data <- data.frame(
  ID = 1:20,
  Age = sample(18:65, 20, replace = TRUE),
  Gender = sample(c("Male", "Female"), 20, replace = TRUE),
  Education = sample(c("High School", "College", "Graduate"), 20, replace = TRUE),
  Income = sample(30000:100000, 20, replace = TRUE),
  Satisfaction = sample(1:10, 20, replace = TRUE)
)

# 2. Exploratory data analysis
summary(comprehensive_data)
str(comprehensive_data)

# Visualizations
hist(comprehensive_data$Age, main = "Age Distribution")
boxplot(Income ~ Gender, data = comprehensive_data, main = "Income by Gender")

# 3. Create new variables
comprehensive_data$Age_Group <- cut(comprehensive_data$Age, 
                                   breaks = c(0, 30, 50, 100),
                                   labels = c("Young", "Middle", "Senior"))

comprehensive_data$Income_K <- comprehensive_data$Income / 1000

# 4. Group analysis
group_summary <- comprehensive_data |> 
  group_by(Gender, Education) |> 
  summarize(
    Mean_Income = mean(Income),
    Mean_Satisfaction = mean(Satisfaction),
    Count = n(),
    .groups = "drop"
  )

Back to top