Open
Description
Hello,
I am using surrogates.jl v6.3.0 to build kriging for a dataset with 500 elements. There are 24 input variables and 1 output. But I found the accuracy is terrible. There is serious overfitting with the model.
using CSV
using DataFrames
using Plots
using Surrogates
using SurrogatesPolyChaos
using StatsPlots
using Statistics
default()
using Dates
using LinearAlgebra
using Lathe.preprocess: TrainTestSplit
df = DataFrame(CSV.File("result.csv"))
train, test = TrainTestSplit(df, .8)
dim = 24
x_train = train[:, 1:dim]
x_train = values.(eachrow(x_train))
y_train = train[:, end]
# y_train = values.(eachrow(y_train))
x_test = test[:, 1:dim]
x_test = values.(eachrow(x_test))
y_test = test[:, end]
lower_bound = [
200, 200, 200, 200, 200, 200,
200, 200, 200, 200, 200, 200,
180, 180, 180, 180, 180, 180,
300, 300, 300, 300, 300, 300
]
upper_bound = [
230, 230, 230, 230, 230, 230,
275, 275, 275, 275, 275, 275,
200, 200, 200, 200, 200, 200,
350, 350, 350, 350, 350, 350
]
p = ones(dim) * 2
theta = [0.5 / max(1e-6 * norm(upper_bound .- lower_bound), std(x_i[i] for x_i in x_train))^p[i] for i in 1:length(x_train[1])]
mymodel = Kriging(x_train, y_train, lower_bound, upper_bound, p=p, theta=theta)
# Prediction
ys_test = mymodel.(x_test)
ys_train = mymodel.(x_train)
# Model assessment criteria
function mae(x, y)
return sum(abs.(x - y)) / length(x)
end
function mape(x, y)
return sum(abs.(x - y)/y) / length(x)
end
function rmse(x, y)
return sqrt(sum(((x - y).^2) / length(x)))
end
function mse(x, y)
return sum(((x - y).^2) / length(x))
end
function r2(x,y)
sse = sum((x - y).^2)
sst = sum((y .- mean(y)).^2)
return 1 - sse / sst
end
println(" ASSESSMENT CRITERIA TRAIN TEST")
println(" Mean Absolute Error ", mae(ys_train, y_train), " ", mae(ys_test, y_test))
println("Mean Absolute Percentage Error ", mape(ys_train, y_train), " ", mape(ys_test, y_test))
println(" Root Mean Square Error ", rmse(ys_train, y_train), " ", rmse(ys_test, y_test))
println(" Mean Square Error ", mse(ys_train, y_train), " ", mse(ys_test, y_test))
println(" R Square ", r2(ys_train, y_train), " ", r2(ys_test, y_test))
According to #368, I calculate the loss function, Inf
is obtained!
function kriging_logpdf(params, x, y, lb, ub)
d = length(params) ÷ 2
theta = params[1:d]
p = params[d+1:end]
surr = Kriging(x, y, lb, ub; p, theta)
n = length(y)
y_minus_1μ = y - ones(length(y), 1) * surr.mu
Rinv = surr.inverse_of_R
term1 = only(-(y_minus_1μ' * surr.inverse_of_R * y_minus_1μ) / 2 / surr.sigma)
term2 = -log((2π * surr.sigma)^(n/2) / sqrt(det(Rinv)))
logpdf = term1 + term2
return logpdf
end
loss_func = params -> -kriging_logpdf(params, x_train, y_train, lower_bound, upper_bound)
loss_func([theta; p])
I don't know how can I improve my model, as p and theta are already the values suggested.
Metadata
Metadata
Assignees
Labels
No labels