Skip to content
Snippets Groups Projects
Commit 51a842e9 authored by Felix Ramnelöv's avatar Felix Ramnelöv
Browse files

Lab 1: Code cleanup

parent 5d196428
No related branches found
No related tags found
No related merge requests found
library(caret)
# ----1.----
data = read.csv("parkinsons.csv")
# Split data
n=dim(data)[1]
set.seed(12345)
id=sample(1:n, floor(n*0.6))
train=data[id,]
test=data[-id,]
# Scale data
scaler <- preProcess(train)
trainS <- predict(scaler,train)
testS <- predict(scaler,test)
trainY <- trainS[,6]
model <- lm(motor_UPDRS~.-total_UPDRS-subject.-age-sex-test_time-1, data=trainS)
summary <- summary(model)
print(summary)
coefficients <- summary$coefficients
train_scaled <- predict(scaler,train)
test_scaled <- predict(scaler,test)
sorted_coefficients <- coefficients[order(coefficients[, 4]), ]
y_train <- train_scaled[,5]
X_train <- train_scaled[,-(1:6)]
y_test <- test_scaled[,5]
X_test <- test_scaled[,-(1:6)]
print(sorted_coefficients)
# ----2.----
mse = function(y,y_hat) {
# Mean square error
mse <- function(y,y_hat) {
mean((y_hat - y)^2)
}
fit_train <- predict(model, trainS)
feature_cols <- colnames(X_train)
target_col <- colnames(train_scaled[5])
print(mse(trainS$motor_UPDRS, fit_train))
print(target_col)
formula <- as.formula(paste(target_col, "~", paste(feature_cols, collapse = " + "), "-1"))
model <- lm(formula, data = train_scaled)
fit_test <- predict(model, testS)
model_summary <- summary(model)
theta <- model_summary$coefficients[,1]
sigma = model_summary$sigma
print(mse(testS$motor_UPDRS, fit_test))
train_pred <- predict(model, train_scaled)
test_pred <- predict(model, test_scaled)
print(paste("MSE on the training data:", mse(train_scaled$motor_UPDRS, train_pred)))
print(paste("MSE on the test data:", mse(test_scaled$motor_UPDRS, test_pred)))
# ----3.----
# Loglikelihood
log_likelihood <- function(theta, sigma) {
y <- trainS[,5]
n <- length(y)
X <- as.matrix(trainS[, 7:22])
y_hat <- X %*% theta
ll <- -n/2 * log(sqrt(2*pi)*sigma) - (1 / (2 * sigma^2)) * sum((y-y_hat)^2)
n <- length(y_train)
ll <- -n/2 * log(sqrt(2*pi)*sigma) - (1 / (2 * sigma^2)) * sum((y_train-as.matrix(X_train) %*% theta)^2)
return(ll)
}
theta = coefficients[,1]
sigma = summary$sigma
ll_train <- log_likelihood(theta, sigma)
# Ridge
ridge <- function(theta, sigma, lambda) {
ridge <- lambda*sum(theta^2) - log_likelihood(theta, sigma)
return(ridge)
}
# RidgeOpt
ridge_opt <- function(lambda) {
initial_theta <- coefficients[, 1]
initial_sigma <- summary$sigma
objective_function <- function(params) {
theta <- params[1:(length(params) - 1)]
......@@ -72,56 +70,34 @@ ridge_opt <- function(lambda) {
return(ridge(theta, sigma, lambda))
}
initial_params <- c(initial_theta, initial_sigma)
return(optim(
par = initial_params,
par = rep(1:17),
fn = objective_function,
method = "BFGS"
))
}
# DF
df = function(lambda) {
X <- as.matrix(trainS[, 7:22])
X <- as.matrix(X_train)
P <- X %*% solve(t(X)%*%X + lambda*diag(ncol(X))) %*% t(X)
return(sum(diag(P)))
}
# lambda = 1
# ----4.----
# Training data
for (lambda in c(1, 100, 1000)) {
opti <- ridge_opt(lambda)
theta_opti <- unlist(opti$par[-17])
names(theta_opti) <- NULL
X_train <- as.matrix(trainS[, 7:22])
y <- trainS[,5]
y_hat <- X_train %*% theta_opti
print(mse(y, y_hat))
}
for (lambda in c(1, 100, 1000)) {
opti <- ridge_opt(lambda)
theta_opti <- unlist(opti$par[-17])
names(theta_opti) <- NULL
X_test <- as.matrix(testS[, 7:22])
y <- testS[,5]
y_hat <- X_test %*% theta_opti
print(mse(y, y_hat))
theta_opti <- unlist(ridge_opt(lambda)$par[-17])
y_hat_train <- as.matrix(X_train) %*% theta_opti
y_hat_test <- as.matrix(X_test) %*% theta_opti
print(paste("MSE for lambda =", lambda, "on training data:", mse(y_train, train_pred)))
print(paste("MSE for lambda =", lambda, "on test data:", mse(y_test, test_pred)))
}
for (lambda in c(1, 100, 1000)) {
print(df(lambda))
print(paste("DF for lambda =", lambda, ":", df(lambda)))
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment