diff --git a/lab3/Lab3Block1_2021_SVMs_St.R b/lab3/Lab3Block1_2021_SVMs_St.R index ee574c76913f87bfdfab31cf80711c83d5314bf3..a14d097fa634c679573811f7788a6378a2895082 100644 --- a/lab3/Lab3Block1_2021_SVMs_St.R +++ b/lab3/Lab3Block1_2021_SVMs_St.R @@ -54,16 +54,29 @@ err3 # 3. Implementation of SVM predictions. +gaussian_kernel <- function(x_i, x_star, sigma) { + return(exp(-sum((x_i - x_star)^2) / (2 * sigma^2))) +} + sv<-alphaindex(filter3)[[1]] co<-coef(filter3)[[1]] inte<- - b(filter3) k<-NULL for(i in 1:10){ # We produce predictions for just the first 10 points in the dataset. - k2<-NULL + k2<-0 + test_point <- spam[i, -58] for(j in 1:length(sv)){ - k2<- # Your code here + + support_vector <- spam[sv[j], -58] + + kernel_value <- gaussian_kernel(support_vector, test_point, sigma = 0.05) + + k2 <- k2 + co[j] * kernel_value + } - k<-c(k, # Your code here) + k2 <- k2 + inte + k <- c(k, sign(k2)) + } k -predict(filter3,spam[1:10,-58], type = "decision") \ No newline at end of file +predict(filter3,spam[1:10,-58], type = "decision") diff --git a/lab3/assignment2.R b/lab3/assignment2.R new file mode 100644 index 0000000000000000000000000000000000000000..d75dcc022b41cfeca4e6f4ff4ecfd2ee747ca2d1 --- /dev/null +++ b/lab3/assignment2.R @@ -0,0 +1,103 @@ +set.seed(1234567890) + +library(geosphere) + +stations <- read.csv("stations.csv", fileEncoding = "latin1") +temps <- read.csv("temps50k.csv") + +st <- merge(stations, temps, by = "station_number") + +h_distance <- 100000 +h_date <- 15 +h_time <- 4 + +a <- 58.4274 +b <- 14.826 +date <- "1960-07-11" +times <- c( + "04:00:00", + "06:00:00", + "08:00:00", + "10:00:00", + "12:00:00", + "14:00:00", + "16:00:00", + "18:00:00", + "20:00:00", + "22:00:00", + "24:00:00" +) + + +st <- st[st$date <= date, ] # Filter out posterior dates +st$time <- strptime(st$time, format = "%H:%M:%S") + +dist = seq(0, 300000) +plot(exp(-abs(dist) ^ 2 / (2 * h_distance ^ 2)), + type = 'l', + xlab = "Physical distance", + ylab = "Kernel") + +#choosing appropriate smoothing coefficient for days +dist = seq(0, 100) +plot(exp(-abs(dist) ^ 2 / (2 * h_date ^ 2)), + type = 'l', + xlab = "Distance in days", + ylab = "Kernel") + +#choosing appropriate smoothing coefficient for hours +dist = seq(0, 18) +plot(exp(-abs(dist) ^ 2 / (2 * h_time ^ 2)), + type = 'l', + xlab = "Distance in hours", + ylab = "Kernel") + +temp_add <- c() +temp_mult <- c() + +for (time in times) { + time <- strptime(time, format = "%H:%M:%S") + + + st_temp <- st[st$date < date | + (st$date == date & + st$time <= time), ] + + + distance_kernels <- mapply(function(lat, lon) { + dist <- distHaversine(c(a, b), c(lat, lon)) + exp(-abs(dist) ^ 2 / (2 * h_distance ^ 2)) + }, st_temp$latitude, st$longitude) + + + + date_kernels <- st_temp$date + date_kernels <- sapply(date_kernels, function(x_i) { + dist <- as.numeric(as.Date(date) - as.Date(x_i)) + exp(-abs(dist) ^ 2 / (2 * h_date ^ 2)) + }) + + + time_kernels <- st_temp$time + time_kernels <- sapply(time_kernels, function(x_i) { + dist <- as.numeric(difftime(time, x_i, units = "hours")) + exp(-abs(dist) ^ 2 / (2 * h_time ^ 2)) + }) + + + + + + kernels_add <- distance_kernels + date_kernels + time_kernels + kernels_mult <- distance_kernels * date_kernels * time_kernels + + temp_add <<- c(temp_add, + sum(kernels_add * st_temp$air_temperature) / sum(kernels_add)) + temp_mult <<- c(temp_mult, + sum(kernels_mult * st_temp$air_temperature) / sum(kernels_mult)) + + +} + +plot(temp_add, type = "o") +plot(temp_mult, type = "o") \ No newline at end of file