Skip to content
Snippets Groups Projects
Commit 8a1cea42 authored by Felix Ramnelöv's avatar Felix Ramnelöv
Browse files

Lab 1: Code and graph cleanup for assignment 1

parent da59abd1
No related branches found
No related tags found
No related merge requests found
library(kknn)
# 1. Import data
data = read.csv("optdigits.csv", header=FALSE)
# ----1.----
n=dim(data)[1]
data = read.csv("optdigits.csv", header = FALSE)
n = dim(data)[1]
set.seed(12345)
id=sample(1:n, floor(n*0.5))
train=data[id,]
id1=setdiff(1:n, id)
id = sample(1:n, floor(n * 0.5))
train = data[id, ]
id1 = setdiff(1:n, id)
set.seed(12345)
id2=sample(id1, floor(n*0.25))
valid=data[id2,]
id3=setdiff(id1,id2)
test=data[id3,]
id2 = sample(id1, floor(n * 0.25))
valid = data[id2, ]
id3 = setdiff(id1, id2)
test = data[id3, ]
# Missclassification rate
missclass=function(X,Xfit){
n=length(X)
return(1-sum(diag(table(X,Xfit)))/n)
missclass = function(X, Xfit) {
n = length(X)
return(1 - sum(diag(table(X, Xfit))) / n)
}
# -------- part 2 ---------
# ----2.----
# Create model from training data
model_train <- kknn(as.factor(V65)~., train, train, k=30, kernel="rectangular")
model_test <- kknn(as.factor(V65)~., train, test, k=30, kernel="rectangular")
model_train <- kknn(as.factor(V65) ~ .,
train,
train,
k = 30,
kernel = "rectangular")
model_test <- kknn(as.factor(V65) ~ ., train, test, k = 30, kernel = "rectangular")
# Get fitted values
fitted_train <- model_train$fitted.values
fitted_test <- model_test$fitted.values
# Create confusion matrix
confusion_matrix_train <- table(train$V65,fitted_train)
confusion_matrix_test <- table(test$V65,fitted_test)
confusion_matrix_train <- table(train$V65, fitted_train)
confusion_matrix_test <- table(test$V65, fitted_test)
print(confusion_matrix_train)
print(confusion_matrix_test)
# Get missclassification rate for the model
missclass_train <- missclass(train$V65,fitted_train)
missclass_test <- missclass(test$V65,fitted_test)
missclass_train <- missclass(train$V65, fitted_train)
missclass_test <- missclass(test$V65, fitted_test)
print(missclass_train)
print(missclass_test)
# -------- part 3 ---------
# ----3.----
# Get all cases where the target is 8
digit_8_cases <- which(train$V65 == "8")
......@@ -55,8 +62,16 @@ hardest_cases <- digit_8_cases[order(probs_digit_8)][1:3]
# Plot case from data
plot_case = function(case, data) {
digit_matrix <- matrix(as.numeric(data[case, -ncol(data)]), nrow = 8, ncol = 8, byrow=TRUE)
heatmap(x = digit_matrix, Colv = NA, Rowv = NA, scale = "none", main = paste("Digit 8 - Case:", case))
digit_matrix <- matrix(as.numeric(data[case, -ncol(data)]),
nrow = 8,
ncol = 8,
byrow = TRUE)
heatmap(
x = digit_matrix,
Colv = NA,
Rowv = NA,
main = paste("Digit 8 - Case:", case)
)
}
for (case in easiest_cases) {
......@@ -67,59 +82,98 @@ for (case in hardest_cases) {
plot_case(case, train)
}
# -------- part 4 ---------
# ----4.----
# Initialize numeric vectors for missclassification rates
train_missclassification <- numeric(30)
valid_missclassification <- numeric(30)
for (i in 1:30) {
temp_model_train <- kknn(as.factor(V65)~., train, train, k=i, kernel="rectangular")
temp_model_valid <- kknn(as.factor(V65)~., train, valid, k=i, kernel="rectangular")
temp_model_train <- kknn(as.factor(V65) ~ .,
train,
train,
k = i,
kernel = "rectangular")
temp_model_valid <- kknn(as.factor(V65) ~ .,
train,
valid,
k = i,
kernel = "rectangular")
train_missclassification[i] <- missclass(train$V65,temp_model_train$fitted.values)
valid_missclassification[i] <- missclass(valid$V65,temp_model_valid$fitted.values)
train_missclassification[i] <- missclass(train$V65, temp_model_train$fitted.values)
valid_missclassification[i] <- missclass(valid$V65, temp_model_valid$fitted.values)
}
print(train_missclassification)
print(valid_missclassification)
# -------- part 5 ---------
# Plot missclassification rates
plot(1:30,valid_missclassification, ylim = c(0, max(valid_missclassification)), col="red", type="l")
points(1:30, train_missclassification, col="blue", type="l")
plot(
1:30,
valid_missclassification,
ylim = c(0, max(valid_missclassification)),
col = "blue",
type = "l",
xlab = "k",
ylab = "Missclassification rate"
)
grid()
points(1:30,
train_missclassification,
col = "red",
type = "l")
# Min classification rate for validation data
print(which.min(valid_missclassification))
# Predict test error for test data given best k for validation data
model_test <- kknn(as.factor(V65)~., train, test, k=which.min(valid_missclassification), kernel="rectangular")
test_missclassification <- missclass(test$V65,model_test$fitted.values)
model_test <- kknn(
as.factor(V65) ~ .,
train,
test,
k = which.min(valid_missclassification),
kernel = "rectangular"
)
test_missclassification <- missclass(test$V65, model_test$fitted.values)
# Missclassification rate for test data
print(test_missclassification)
cross_entropy=function(X_true,X_pred,epsilon=1e-15){
# -----5.----
# Compute cross-entropy loss
return(-sum(X_true * log(X_pred + epsilon))/ nrow(X_true))
# Average cross-entropy loss
cross_entropy = function(X_true, X_pred, epsilon = 1e-15) {
n = length(X_true)
return(-sum(X_true * log(X_pred + epsilon))/n)
}
valid_cross_entropy <- numeric(30)
for (i in 1:30) {
temp_model_valid <- kknn(as.factor(V65)~., train, valid, k=i, kernel="rectangular")
temp_model_valid <- kknn(as.factor(V65) ~ .,
train,
valid,
k = i,
kernel = "rectangular")
X_true <- model.matrix(~ as.factor(valid$V65) - 1)
# one-shot encoded matrix of target variables (to skip loops)
X_true <- model.matrix( ~ as.factor(valid$V65) - 1)
X_pred <- temp_model_valid$prob
valid_cross_entropy[i] = cross_entropy(X_true,X_pred)
valid_cross_entropy[i] = cross_entropy(X_true, X_pred)
}
plot(1:30,valid_cross_entropy, ylim = c(0, max(valid_cross_entropy)), col="red", type="l")
print(which.min(valid_cross_entropy))
plot(
1:30,
valid_cross_entropy,
ylim = c(0, max(valid_cross_entropy)),
col = "blue",
type = "l",
ylab = "Average cross-entropy loss",
xlab = "k",
)
grid()
print(which.min(valid_cross_entropy))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment