Skip to content
Snippets Groups Projects
Commit 0c17897c authored by Felix Ramnelöv's avatar Felix Ramnelöv
Browse files

Lab 1: Assignment 1 done

parent 29bd9165
No related branches found
No related tags found
No related merge requests found
......@@ -3,7 +3,9 @@ library(dplyr)
library(tidyr)
library(kknn)
library(ggplot2)
library(DPpack)
# 1. Import data
data = read.csv("optdigits.csv", header=FALSE)
n=dim(data)[1]
......@@ -24,26 +26,32 @@ missclass=function(X,Xfit){
}
# Create model from training data
model <- kknn(as.factor(V65)~., train, train, k=30, kernel="rectangular")
model_train <- kknn(as.factor(V65)~., train, train, k=30, kernel="rectangular")
model_test <- kknn(as.factor(V65)~., train, test, k=30, kernel="rectangular")
# Get fitted values
fitted <- model$fitted.values
fitted_train <- model_train$fitted.values
fitted_test <- model_test$fitted.values
# Create confusion matrix
confusion_matrix <- table(train$V65,fitted)
confusion_matrix_train <- table(train$V65,fitted_train)
confusion_matrix_test <- table(test$V65,fitted_test)
print(confusion_matrix)
print(confusion_matrix_train)
print(confusion_matrix_test)
# Get missclassification rate for the model
train_missclassification <- missclass(train$V65,fitted)
missclass_train <- missclass(train$V65,fitted_train)
missclass_test <- missclass(test$V65,fitted_test)
print(train_missclassification)
print(missclass_train)
print(missclass_test)
# Get all cases where the target is 8
digit_8_cases <- which(train$V65 == "8")
# Get the probabilities for the cases
probs_digit_8 <- model$prob[digit_8_cases, "8"]
probs_digit_8 <- model_train$prob[digit_8_cases, "8"]
easiest_cases <- digit_8_cases[order(-probs_digit_8)][1:2]
hardest_cases <- digit_8_cases[order(probs_digit_8)][1:3]
......@@ -80,8 +88,8 @@ print(train_missclassification)
print(valid_missclassification)
# Plot missclassification rates
plot(1:30,valid_missclassification, ylim = c(0, max(valid_missclassification)), col="red", type="l")
points(1:30, train_missclassification, col="blue", type="l")
plot(1:30,valid_missclassification, ylim = c(0, max(valid_missclassification)), col="red")
points(1:30, train_missclassification, col="blue")
# Min classification rate for validation data
print(which.min(valid_missclassification))
......@@ -93,4 +101,26 @@ test_missclassification <- missclass(test$V65,model_test$fitted.values)
# Missclassification rate for test data
print(test_missclassification)
cross_entropy=function(X_true,X_pred,epsilon=1e-15){
# Compute cross-entropy loss
return(-sum(X_true * log(X_pred + epsilon))/ nrow(X_true))
}
valid_cross_entropy <- numeric(30)
for (i in 1:30) {
temp_model_valid <- kknn(as.factor(V65)~., train, valid, k=i, kernel="rectangular")
X_true <- model.matrix(~ as.factor(valid$V65) - 1)
X_pred <- temp_model_valid$prob
valid_cross_entropy[i] = cross_entropy(X_true,X_pred)
}
plot(1:30,valid_cross_entropy, ylim = c(0, max(valid_cross_entropy)), col="red", type="l")
print(which.min(valid_cross_entropy))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment