This is part 3
of the project “Covid-19 Policy Decision Helper”. In this part, we try to model the relationship between the SEIR model parameters and a variety of social factors, including demographic, medical and policy factors. We aim at obtaining models that are interpretive as well as predictive; in order words, we are hoping to find models that are simple, accessible and easy to be interpreted and understood, so that people can gain some insights of what is significant to the way a pandemic develops, but at the same time, we are also striving to find models that are most helpful making predictions among those explainable models.
With such goals in mind, we engaged a relatively small number of variables in our study–variables that seem significant to us intuitively, from the most accessible open data source.
The modeling methods we applied include the following:
Data source:
part 1
and part 2
.StateOfInterest = c("Arizona", "California", "Minnesota", "New Mexico", "New York",
"Oklahoma", "South Carolina", "Tennessee", "Utah", "Virginia",
"West Virginia", "Wisconsin")
library(readr)
county_data_abridged = read_csv("county_data_abridged.csv")
dim(county_data_abridged)
## [1] 3244 87
names(county_data_abridged)
## [1] "countyFIPS" "STATEFP"
## [3] "COUNTYFP" "CountyName"
## [5] "StateName" "State"
## [7] "lat" "lon"
## [9] "POP_LATITUDE" "POP_LONGITUDE"
## [11] "CensusRegionName" "CensusDivisionName"
## [13] "Rural-UrbanContinuumCode2013" "PopulationEstimate2018"
## [15] "PopTotalMale2017" "PopTotalFemale2017"
## [17] "FracMale2017" "PopulationEstimate65+2017"
## [19] "PopulationDensityperSqMile2010" "CensusPopulation2010"
## [21] "MedianAge2010" "#EligibleforMedicare2018"
## [23] "MedicareEnrollment,AgedTot2017" "3-YrDiabetes2015-17"
## [25] "DiabetesPercentage" "HeartDiseaseMortality"
## [27] "StrokeMortality" "Smokers_Percentage"
## [29] "RespMortalityRate2014" "#FTEHospitalTotal2017"
## [31] "TotalM.D.'s,TotNon-FedandFed2017" "#HospParticipatinginNetwork2017"
## [33] "#Hospitals" "#ICU_beds"
## [35] "dem_to_rep_ratio" "PopMale<52010"
## [37] "PopFmle<52010" "PopMale5-92010"
## [39] "PopFmle5-92010" "PopMale10-142010"
## [41] "PopFmle10-142010" "PopMale15-192010"
## [43] "PopFmle15-192010" "PopMale20-242010"
## [45] "PopFmle20-242010" "PopMale25-292010"
## [47] "PopFmle25-292010" "PopMale30-342010"
## [49] "PopFmle30-342010" "PopMale35-442010"
## [51] "PopFmle35-442010" "PopMale45-542010"
## [53] "PopFmle45-542010" "PopMale55-592010"
## [55] "PopFmle55-592010" "PopMale60-642010"
## [57] "PopFmle60-642010" "PopMale65-742010"
## [59] "PopFmle65-742010" "PopMale75-842010"
## [61] "PopFmle75-842010" "PopMale>842010"
## [63] "PopFmle>842010" "3-YrMortalityAge<1Year2015-17"
## [65] "3-YrMortalityAge1-4Years2015-17" "3-YrMortalityAge5-14Years2015-17"
## [67] "3-YrMortalityAge15-24Years2015-17" "3-YrMortalityAge25-34Years2015-17"
## [69] "3-YrMortalityAge35-44Years2015-17" "3-YrMortalityAge45-54Years2015-17"
## [71] "3-YrMortalityAge55-64Years2015-17" "3-YrMortalityAge65-74Years2015-17"
## [73] "3-YrMortalityAge75-84Years2015-17" "3-YrMortalityAge85+Years2015-17"
## [75] "mortality2015-17Estimated" "stay at home"
## [77] ">50 gatherings" ">500 gatherings"
## [79] "public schools" "restaurant dine-in"
## [81] "entertainment/gym" "federal guidelines"
## [83] "foreign travel ban" "SVIPercentile"
## [85] "HPSAShortage" "HPSAServedPop"
## [87] "HPSAUnderservedPop"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "#EligibleforMedicare2018"] = "EligibleforMedicare2018"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "#FTEHospitalTotal2017"] = "FTEHospitalTotal2017"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "#HospParticipatinginNetwork2017"] = "HospParticipatinginNetwork2017"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "#Hospitals"] = "Hospitals"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "#ICU_beds"] = "ICU_beds"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "PopulationEstimate65+2017"] = "PopulationEstimate_above65_2017"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "stay at home"] = "stay_at_home"
colnames(county_data_abridged)[colnames(county_data_abridged)
== ">50 gatherings"] = "above_50_gatherings"
colnames(county_data_abridged)[colnames(county_data_abridged)
== ">500 gatherings"] = "above_500_gatherings"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "public schools"] = "public_schools"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "restaurant dine-in"] = "restaurant_dine_in"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "entertainment/gym"] = "entertainment_gym"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "federal guidelines"] = "federal_guidelines"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "foreign travel ban"] = "foreign_travel_ban"
data = subset(county_data_abridged,
select = c(State, CountyName, POP_LATITUDE, POP_LONGITUDE,
PopulationEstimate2018, PopTotalMale2017,
PopulationEstimate_above65_2017, PopulationDensityperSqMile2010,
DiabetesPercentage, Smokers_Percentage,
HeartDiseaseMortality, StrokeMortality,
Hospitals, ICU_beds, HospParticipatinginNetwork2017,
stay_at_home, above_50_gatherings, above_500_gatherings,
restaurant_dine_in, entertainment_gym))
data = na.omit(data)
data = droplevels(data)
data$stay_at_home = data$stay_at_home - range(data$stay_at_home)[1]
data$above_50_gatherings = data$above_50_gatherings - range(data$above_50_gatherings)[1]
data$above_500_gatherings = data$above_500_gatherings - range(data$above_500_gatherings)[1]
data$restaurant_dine_in = data$restaurant_dine_in - range(data$restaurant_dine_in)[1]
data$entertainment_gym = data$entertainment_gym - range(data$entertainment_gym)[1]
str(data)
## tibble [2,585 × 20] (S3: tbl_df/tbl/data.frame)
## $ State : chr [1:2585] "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ CountyName : chr [1:2585] "Autauga" "Baldwin" "Barbour" "Bibb" ...
## $ POP_LATITUDE : num [1:2585] 32.5 30.5 31.8 33 34 ...
## $ POP_LONGITUDE : num [1:2585] -86.5 -87.8 -85.3 -87.1 -86.6 ...
## $ PopulationEstimate2018 : num [1:2585] 55601 218022 24881 22400 57840 ...
## $ PopTotalMale2017 : num [1:2585] 27007 103225 13335 12138 28607 ...
## $ PopulationEstimate_above65_2017: num [1:2585] 8392 42413 4757 3632 10351 ...
## $ PopulationDensityperSqMile2010 : num [1:2585] 91.8 114.7 31 36.8 88.9 ...
## $ DiabetesPercentage : num [1:2585] 9.9 8.5 15.7 13.3 14.9 22.4 16.9 15.6 17.5 12.2 ...
## $ Smokers_Percentage : num [1:2585] 18.1 17.5 22 19.1 19.2 ...
## $ HeartDiseaseMortality : num [1:2585] 204 183 220 226 225 ...
## $ StrokeMortality : num [1:2585] 56.1 41.9 49 57.2 52.8 54.1 59 44 45.2 47.3 ...
## $ Hospitals : num [1:2585] 1 3 1 1 1 1 1 2 0 1 ...
## $ ICU_beds : num [1:2585] 6 51 5 0 6 0 7 24 0 0 ...
## $ HospParticipatinginNetwork2017 : num [1:2585] 0 0 0 0 1 0 0 0 0 0 ...
## $ stay_at_home : num [1:2585] 16 16 16 16 16 16 16 16 16 16 ...
## $ above_50_gatherings : num [1:2585] 5 5 5 5 5 5 5 5 5 5 ...
## $ above_500_gatherings : num [1:2585] 2 2 2 2 2 2 2 2 2 2 ...
## $ restaurant_dine_in : num [1:2585] 7 7 7 7 7 7 7 7 7 7 ...
## $ entertainment_gym : num [1:2585] 16 16 16 16 16 16 16 16 16 16 ...
## - attr(*, "na.action")= 'omit' Named int [1:659] 68 69 70 71 72 73 74 75 76 77 ...
## ..- attr(*, "names")= chr [1:659] "68" "69" "70" "71" ...
data_demographic_county = data
timeseries = read_csv("../datasets/timeseries.csv")
data = timeseries
data = subset(data, country == "United States" & level == "state")
data = subset(data, !(name %in% c("Unassigned cases, Arkansas, US",
"Unassigned cases, Georgia, US", "Unassigned cases, Illinois, US",
"Unassigned cases, Iowa, US", "Unassigned cases, Maine, US",
"Unassigned cases, Massachusetts, US", "Unassigned cases, North Dakota, US",
"Washington, D.C., US")))
data$state = matrix(unlist(strsplit(as.character(data$name), ", ")), ncol = 2, byrow = TRUE)[, 1]
data = subset(data,
select = c(state, date, cases, deaths, recovered))
data = subset(data, state %in% StateOfInterest)
data$state = as.factor(data$state)
data$cases = as.numeric(data$cases)
data$deaths = as.numeric(data$deaths)
data$recovered = as.numeric(data$recovered)
data = na.omit(data)
data = droplevels(data)
str(data)
## tibble [1,578 × 5] (S3: tbl_df/tbl/data.frame)
## $ state : Factor w/ 12 levels "Arizona","California",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ date : Date[1:1578], format: "2020-04-14" "2020-04-15" ...
## $ cases : num [1:1578] 3806 3962 4234 4507 4719 ...
## $ deaths : num [1:1578] 131 142 150 169 177 184 187 208 229 249 ...
## $ recovered: num [1:1578] 249 385 460 539 539 ...
## - attr(*, "problems")= tibble [1,666,558 × 5] (S3: tbl_df/tbl/data.frame)
## ..$ row : int [1:1666558] 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 ...
## ..$ col : chr [1:1666558] "state" "state" "state" "state" ...
## ..$ expected: chr [1:1666558] "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" ...
## ..$ actual : chr [1:1666558] "Burgenland" "Burgenland" "Burgenland" "Burgenland" ...
## ..$ file : chr [1:1666558] "'../datasets/timeseries.csv'" "'../datasets/timeseries.csv'" "'../datasets/timeseries.csv'" "'../datasets/timeseries.csv'" ...
## - attr(*, "na.action")= 'omit' Named int [1:894] 1 2 3 4 5 6 7 8 9 10 ...
## ..- attr(*, "names")= chr [1:894] "1" "2" "3" "4" ...
range(data$cases)
## [1] 237 613076
range(data$deaths)
## [1] 1 25250
range(data$recovered)
## [1] 2 266287
data_cases_state = data
model_out = read_csv("../datasets/model_out.csv")
str(model_out)
## tibble [73 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ state : chr [1:73] "Arizona" "Arizona" "Arizona" "Arizona" ...
## $ startdate: Date[1:73], format: "2020-04-16" "2020-05-01" ...
## $ enddate : Date[1:73], format: "2020-04-30" "2020-05-15" ...
## $ k : num [1:73] 0.783 0.863 0.865 0.949 0.986 ...
## $ sigma : num [1:73] 0.0714 0.0714 0.0714 0.0714 0.0714 ...
## $ lamda : num [1:73] 0.0563 0.07 0.1266 0.0784 0.0925 ...
## $ c : num [1:73] 1 0 0.127 1 0 ...
## $ alpha : num [1:73] 0.06825 0.00721 0.06765 0.10224 0.05641 ...
## $ omega : num [1:73] 0.00275 0.003091 0.001429 0.001371 0.000698 ...
## $ miu : num [1:73] 0.0384 0.0175 0.0338 0.036 0.014 ...
## - attr(*, "spec")=
## .. cols(
## .. state = col_character(),
## .. startdate = col_date(format = ""),
## .. enddate = col_date(format = ""),
## .. k = col_double(),
## .. sigma = col_double(),
## .. lamda = col_double(),
## .. c = col_double(),
## .. alpha = col_double(),
## .. omega = col_double(),
## .. miu = col_double()
## .. )
data_parm = model_out
data = data_parm
n = length(data$state)
data$cases = rep(0, n)
data$deaths = rep(0, n)
data$recovered = rep(0, n)
for (i in 1:n){
j = data_cases_state$state == data$state[i] & data_cases_state$date == data$startdate[i]
if (sum(j) == 0){
data[i, ] = NA
}else{
data[i, c("cases", "deaths", "recovered")] = data_cases_state[j, c("cases", "deaths", "recovered")]
}
}
data = na.omit(data)
data_demographic_state = as.data.frame(matrix(nrow = length(data$state), ncol = length(colnames(data_demographic_county)) - 1))
colnames(data_demographic_state) = colnames(data_demographic_county)[colnames(data_demographic_county) != "CountyName"]
data_demographic_state$State = data$state
for (s in data_demographic_state$State){
for (k in 2:ncol(data_demographic_state)){
data_demographic_state[data_demographic_state$State == s, k] =
mean(unlist(data_demographic_county[data_demographic_county$State == s, k+1]))
}
}
data = cbind(data,
subset(data_demographic_state[, colnames(data_demographic_state)[colnames(data_demographic_state) != "State"]]))
data = na.omit(data)
data$days = rep(0, nrow(data))
data$temp = rep(0, nrow(data))
for (i in 1:nrow(data)){
data$temp = diff.Date(c(data[i, ]$startdate, data[i, ]$enddate))
}
for (s in data$state){
for (d in 1:nrow(subset(data, state == s))){
data[data$state == s, "days"][d] = sum(subset(data, state == s)[1:d, "temp"])
}
}
data = data[, colnames(data)[colnames(data) != "temp"]]
data_unamed = data[, colnames(data)[!(colnames(data) %in% c("state", "startdate", "enddate"))]]
# head(data_demographic_county, 10)
# head(data_demographic_state, 10)
# head(data[, c("state", "startdate", "enddate", "days")], 10)
head(data, 10)
## state startdate enddate k sigma lamda c alpha
## 1 Arizona 2020-04-16 2020-04-30 0.7834 0.07143 0.05633 1.00000 0.06825
## 2 Arizona 2020-05-01 2020-05-15 0.8631 0.07143 0.07001 0.00000 0.00721
## 3 Arizona 2020-05-16 2020-05-30 0.8653 0.07143 0.12657 0.12709 0.06765
## 4 Arizona 2020-06-01 2020-06-15 0.9492 0.07143 0.07839 1.00000 0.10224
## 5 Arizona 2020-06-16 2020-06-30 0.9864 0.07143 0.09253 0.00000 0.05641
## 6 Arizona 2020-07-01 2020-07-15 0.9732 0.07143 0.06093 0.00000 0.00000
## 7 California 2020-04-16 2020-04-30 0.8574 0.07143 0.07211 0.07752 0.02728
## 8 California 2020-05-01 2020-05-15 0.9481 0.07143 0.02832 1.00000 0.01925
## 9 California 2020-05-16 2020-05-30 0.9598 0.07143 0.04972 0.40540 0.03497
## 10 California 2020-06-01 2020-06-15 1.1270 0.07143 0.10206 0.01176 0.03073
## omega miu cases deaths recovered POP_LATITUDE POP_LONGITUDE
## 1 0.0027505 0.038429 4234 150 460 33.58 -111.5
## 2 0.0030913 0.017520 7962 330 1528 33.58 -111.5
## 3 0.0014293 0.033751 13631 679 3357 33.58 -111.5
## 4 0.0013712 0.036006 20123 917 4869 33.58 -111.5
## 5 0.0006979 0.013973 39097 1219 6598 33.58 -111.5
## 6 0.0000000 0.003805 84092 1720 9715 33.58 -111.5
## 7 0.0023628 0.007923 28035 970 1753 37.82 -120.9
## 8 0.0013106 0.006909 52152 2131 5130 37.82 -120.9
## 9 0.0009157 0.012128 78704 3207 9098 37.82 -120.9
## 10 0.0006310 0.012709 115032 4219 17585 37.82 -120.9
## PopulationEstimate2018 PopTotalMale2017 PopulationEstimate_above65_2017
## 1 478110 232553 80116
## 2 478110 232553 80116
## 3 478110 232553 80116
## 4 478110 232553 80116
## 5 478110 232553 80116
## 6 478110 232553 80116
## 7 682018 338751 94920
## 8 682018 338751 94920
## 9 682018 338751 94920
## 10 682018 338751 94920
## PopulationDensityperSqMile2010 DiabetesPercentage Smokers_Percentage
## 1 52.05 10.060 16.48
## 2 52.05 10.060 16.48
## 3 52.05 10.060 16.48
## 4 52.05 10.060 16.48
## 5 52.05 10.060 16.48
## 6 52.05 10.060 16.48
## 7 663.26 8.505 12.09
## 8 663.26 8.505 12.09
## 9 663.26 8.505 12.09
## 10 663.26 8.505 12.09
## HeartDiseaseMortality StrokeMortality Hospitals ICU_beds
## 1 148.8 30.90 5.067 103.9
## 2 148.8 30.90 5.067 103.9
## 3 148.8 30.90 5.067 103.9
## 4 148.8 30.90 5.067 103.9
## 5 148.8 30.90 5.067 103.9
## 6 148.8 30.90 5.067 103.9
## 7 153.9 37.89 5.672 126.5
## 8 153.9 37.89 5.672 126.5
## 9 153.9 37.89 5.672 126.5
## 10 153.9 37.89 5.672 126.5
## HospParticipatinginNetwork2017 stay_at_home above_50_gatherings
## 1 1.800 12 2
## 2 1.800 12 2
## 3 1.800 12 2
## 4 1.800 12 2
## 5 1.800 12 2
## 6 1.800 12 2
## 7 1.845 0 4
## 8 1.845 0 4
## 9 1.845 0 4
## 10 1.845 0 4
## above_500_gatherings restaurant_dine_in entertainment_gym days
## 1 6 7 7 14
## 2 6 7 7 28
## 3 6 7 7 42
## 4 6 7 7 56
## 5 6 7 7 70
## 6 6 7 7 84
## 7 8 3 3 14
## 8 8 3 3 28
## 9 8 3 3 42
## 10 8 3 3 56
pairs()
Plotset.seed(42)
num_obs = nrow(data_unamed) # total number of observations
num_trn = round(num_obs * 0.90) # number of observations for the training data
trn_idx = sample(num_obs, num_trn) # randomly generate the index for the training data
data_trn = data_unamed[trn_idx, ] # training data
data_tst = data_unamed[-trn_idx, ] # testing data
k
# full additive model
mod_k_full = lm(k ~ ., data = data_trn)
test_mod(mod_k_full, k = 1)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.221852 0.475015 0.016221 0.000191 29.000000 0.149099 14.731106
summary(mod_k_full)
##
## Call:
## lm(formula = k ~ ., data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4647 -0.0452 0.0073 0.0503 0.2077
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.62e+01 3.93e+00 4.13 0.00019 ***
## sigma NA NA NA NA
## lamda -6.46e+00 1.20e+00 -5.41 3.5e-06 ***
## c -3.05e-01 7.17e-02 -4.26 0.00012 ***
## alpha 3.08e+00 1.02e+00 3.00 0.00465 **
## omega 2.32e+01 2.59e+01 0.89 0.37630
## miu 4.41e+00 1.99e+00 2.21 0.03272 *
## cases -4.57e-07 1.64e-06 -0.28 0.78232
## deaths 1.32e-05 1.91e-05 0.69 0.49356
## recovered -1.54e-06 6.60e-06 -0.23 0.81645
## POP_LATITUDE -2.01e-01 5.03e-02 -3.99 0.00028 ***
## POP_LONGITUDE 6.94e-02 1.69e-02 4.11 0.00019 ***
## PopulationEstimate2018 -1.13e-04 2.90e-05 -3.91 0.00036 ***
## PopTotalMale2017 2.15e-04 5.58e-05 3.85 0.00043 ***
## PopulationEstimate_above65_2017 7.20e-05 1.76e-05 4.10 0.00020 ***
## PopulationDensityperSqMile2010 2.45e-05 1.41e-04 0.17 0.86330
## DiabetesPercentage -8.36e-01 2.00e-01 -4.19 0.00015 ***
## Smokers_Percentage 3.32e-01 8.49e-02 3.91 0.00036 ***
## HeartDiseaseMortality -2.90e-02 9.27e-03 -3.13 0.00331 **
## StrokeMortality 1.61e-01 4.45e-02 3.62 0.00083 ***
## Hospitals NA NA NA NA
## ICU_beds NA NA NA NA
## HospParticipatinginNetwork2017 NA NA NA NA
## stay_at_home NA NA NA NA
## above_50_gatherings NA NA NA NA
## above_500_gatherings NA NA NA NA
## restaurant_dine_in NA NA NA NA
## entertainment_gym NA NA NA NA
## days 3.16e-03 1.46e-03 2.17 0.03613 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.136 on 39 degrees of freedom
## Multiple R-squared: 0.647, Adjusted R-squared: 0.475
## F-statistic: 3.76 on 19 and 39 DF, p-value: 0.000227
# small additive model
mod_k_1 = lm(k ~ days, data = data_trn)
# large additive model
mod_k_2 = lm(k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
test_mod(mod_k_1, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 1.867e-01 2.328e-02 9.083e-01 4.534e-13 2.000e+00 5.830e-02 5.580e+00
test_mod(mod_k_2, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.221852 0.475015 0.016221 0.000191 20.000000 0.149099 14.731106
summary(mod_k_1)
##
## Call:
## lm(formula = k ~ days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2560 -0.0326 0.0395 0.0661 0.2381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.805555 0.054607 14.75 <2e-16 ***
## days 0.001488 0.000964 1.54 0.13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.186 on 57 degrees of freedom
## Multiple R-squared: 0.0401, Adjusted R-squared: 0.0233
## F-statistic: 2.38 on 1 and 57 DF, p-value: 0.128
summary(mod_k_2)
##
## Call:
## lm(formula = k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4647 -0.0452 0.0073 0.0503 0.2077
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.62e+01 3.93e+00 4.13 0.00019 ***
## lamda -6.46e+00 1.20e+00 -5.41 3.5e-06 ***
## c -3.05e-01 7.17e-02 -4.26 0.00012 ***
## alpha 3.08e+00 1.02e+00 3.00 0.00465 **
## omega 2.32e+01 2.59e+01 0.89 0.37630
## miu 4.41e+00 1.99e+00 2.21 0.03272 *
## cases -4.57e-07 1.64e-06 -0.28 0.78232
## deaths 1.32e-05 1.91e-05 0.69 0.49356
## recovered -1.54e-06 6.60e-06 -0.23 0.81645
## POP_LATITUDE -2.01e-01 5.03e-02 -3.99 0.00028 ***
## POP_LONGITUDE 6.94e-02 1.69e-02 4.11 0.00019 ***
## PopulationEstimate2018 -1.13e-04 2.90e-05 -3.91 0.00036 ***
## PopTotalMale2017 2.15e-04 5.58e-05 3.85 0.00043 ***
## PopulationEstimate_above65_2017 7.20e-05 1.76e-05 4.10 0.00020 ***
## PopulationDensityperSqMile2010 2.45e-05 1.41e-04 0.17 0.86330
## DiabetesPercentage -8.36e-01 2.00e-01 -4.19 0.00015 ***
## Smokers_Percentage 3.32e-01 8.49e-02 3.91 0.00036 ***
## HeartDiseaseMortality -2.90e-02 9.27e-03 -3.13 0.00331 **
## StrokeMortality 1.61e-01 4.45e-02 3.62 0.00083 ***
## days 3.16e-03 1.46e-03 2.17 0.03613 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.136 on 39 degrees of freedom
## Multiple R-squared: 0.647, Adjusted R-squared: 0.475
## F-statistic: 3.76 on 19 and 39 DF, p-value: 0.000227
# intermediate model
mod_k_3 = lm(k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010
+ log(days) - days
+ stay_at_home - PopulationEstimate2018 - PopTotalMale2017
+ I(POP_LATITUDE ^ 2) + + I(POP_LONGITUDE ^ 2),
data = data_trn)
mod_k_4 = lm(k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010 +
log(days) - days,
data = data_trn)
test_mod(mod_k_1, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 1.867e-01 2.328e-02 9.083e-01 4.534e-13 2.000e+00 5.830e-02 5.580e+00
test_mod(mod_k_2, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.221852 0.475015 0.016221 0.000191 20.000000 0.149099 14.731106
test_mod(mod_k_3, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.2054978 0.5247754 0.0017914 0.0001737 16.0000000 0.1552475 14.6026990
test_mod(mod_k_4, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.2042750 0.5311550 0.0009837 0.0004907 15.0000000 0.1546657 14.4151641
summary(mod_k_3)
##
## Call:
## lm(formula = k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days + stay_at_home - PopulationEstimate2018 -
## PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2),
## data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4742 -0.0548 0.0072 0.0526 0.2431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.13e+03 4.10e+02 2.75 0.0087 **
## lamda -6.79e+00 1.13e+00 -6.02 3.4e-07 ***
## c -3.12e-01 6.69e-02 -4.67 2.9e-05 ***
## alpha 3.83e+00 8.45e-01 4.54 4.5e-05 ***
## miu 4.58e+00 1.71e+00 2.69 0.0102 *
## POP_LATITUDE -2.11e+01 7.52e+00 -2.80 0.0076 **
## POP_LONGITUDE 1.51e+01 5.58e+00 2.70 0.0098 **
## PopulationEstimate_above65_2017 -1.64e-04 6.58e-05 -2.50 0.0163 *
## DiabetesPercentage -8.55e+00 3.03e+00 -2.82 0.0072 **
## Smokers_Percentage 4.53e+00 1.62e+00 2.80 0.0077 **
## HeartDiseaseMortality -8.73e-02 2.73e-02 -3.20 0.0026 **
## StrokeMortality 4.52e-01 1.34e-01 3.38 0.0016 **
## log(days) 1.02e-01 2.99e-02 3.41 0.0014 **
## stay_at_home 1.14e+00 4.42e-01 2.59 0.0129 *
## I(POP_LATITUDE^2) 2.65e-01 9.49e-02 2.79 0.0078 **
## I(POP_LONGITUDE^2) 7.85e-02 2.91e-02 2.70 0.0100 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.13 on 43 degrees of freedom
## Multiple R-squared: 0.648, Adjusted R-squared: 0.525
## F-statistic: 5.27 on 15 and 43 DF, p-value: 8.7e-06
summary(mod_k_4)
##
## Call:
## lm(formula = k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4732 -0.0497 -0.0017 0.0607 0.2366
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.3989580 3.5627641 4.32 8.7e-05 ***
## lamda -6.8317626 1.1179898 -6.11 2.3e-07 ***
## c -0.3221741 0.0646726 -4.98 1.0e-05 ***
## alpha 3.6975440 0.8116795 4.56 4.1e-05 ***
## miu 4.9379779 1.6006925 3.08 0.00351 **
## POP_LATITUDE -0.1920080 0.0440854 -4.36 7.8e-05 ***
## POP_LONGITUDE 0.0662907 0.0153167 4.33 8.5e-05 ***
## PopulationEstimate2018 -0.0001105 0.0000270 -4.09 0.00018 ***
## PopTotalMale2017 0.0002107 0.0000518 4.06 0.00020 ***
## PopulationEstimate_above65_2017 0.0000669 0.0000154 4.35 8.1e-05 ***
## DiabetesPercentage -0.7787347 0.1646073 -4.73 2.3e-05 ***
## Smokers_Percentage 0.3062246 0.0605033 5.06 7.9e-06 ***
## HeartDiseaseMortality -0.0257963 0.0058214 -4.43 6.1e-05 ***
## StrokeMortality 0.1439159 0.0288281 4.99 9.9e-06 ***
## log(days) 0.0993978 0.0294363 3.38 0.00154 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.129 on 44 degrees of freedom
## Multiple R-squared: 0.644, Adjusted R-squared: 0.531
## F-statistic: 5.69 on 14 and 44 DF, p-value: 4.16e-06
# intermediate model
# relatively bad models
mod_k_5 = lm(k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- (lamda + c + alpha + omega + miu),
data = data_trn)
mod_k_6 = lm(k ~ lamda + c + alpha + omega + miu,
data = data_trn)
test_mod(mod_k_1, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 1.867e-01 2.328e-02 9.083e-01 4.534e-13 2.000e+00 5.830e-02 5.580e+00
test_mod(mod_k_2, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.221852 0.475015 0.016221 0.000191 20.000000 0.149099 14.731106
test_mod(mod_k_3, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.2054978 0.5247754 0.0017914 0.0001737 16.0000000 0.1552475 14.6026990
test_mod(mod_k_4, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.2042750 0.5311550 0.0009837 0.0004907 15.0000000 0.1546657 14.4151641
test_mod(mod_k_5, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 2.004e-01 1.101e-01 2.567e-01 2.605e-12 1.500e+01 9.543e-02 7.160e+00
test_mod(mod_k_6, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 2.135e-01 8.328e-02 1.932e-02 7.076e-10 6.000e+00 5.237e-02 5.250e+00
summary(mod_k_5)
##
## Call:
## lm(formula = k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - (lamda +
## c + alpha + omega + miu), data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9987 -0.0268 0.0054 0.0370 0.2963
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.44e+00 4.21e+00 2.01 0.051 .
## cases 5.03e-07 1.92e-06 0.26 0.795
## deaths 1.45e-05 2.31e-05 0.63 0.533
## recovered -5.29e-06 7.18e-06 -0.74 0.465
## POP_LATITUDE -9.83e-02 5.46e-02 -1.80 0.079 .
## POP_LONGITUDE 3.81e-02 1.82e-02 2.10 0.042 *
## PopulationEstimate2018 -5.57e-05 2.94e-05 -1.90 0.064 .
## PopTotalMale2017 1.06e-04 5.64e-05 1.88 0.067 .
## PopulationEstimate_above65_2017 3.67e-05 2.00e-05 1.83 0.073 .
## PopulationDensityperSqMile2010 -2.84e-05 1.52e-04 -0.19 0.852
## DiabetesPercentage -4.58e-01 2.28e-01 -2.01 0.051 .
## Smokers_Percentage 1.93e-01 9.88e-02 1.96 0.057 .
## HeartDiseaseMortality -1.62e-02 1.09e-02 -1.48 0.147
## StrokeMortality 9.04e-02 5.25e-02 1.72 0.092 .
## days 2.01e-03 1.60e-03 1.25 0.216
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.177 on 44 degrees of freedom
## Multiple R-squared: 0.325, Adjusted R-squared: 0.11
## F-statistic: 1.51 on 14 and 44 DF, p-value: 0.147
summary(mod_k_6)
##
## Call:
## lm(formula = k ~ lamda + c + alpha + omega + miu, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0289 -0.0445 0.0356 0.0886 0.2567
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1002 0.0791 13.90 <2e-16 ***
## lamda -2.7175 1.2218 -2.22 0.030 *
## c -0.1608 0.0772 -2.08 0.042 *
## alpha 2.0142 1.1260 1.79 0.079 .
## omega -36.1277 24.4670 -1.48 0.146
## miu 0.8043 1.0704 0.75 0.456
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.18 on 53 degrees of freedom
## Multiple R-squared: 0.162, Adjusted R-squared: 0.0833
## F-statistic: 2.05 on 5 and 53 DF, p-value: 0.0859
mod_k = mod_k_6
# diagnostics(mod_k, testit = FALSE)
sigma
In this case, sigma
is set to an empirical value. Modeling excluded.
lamda
# full additive model
mod_lamda_full = lm(lamda ~ ., data = data_trn)
test_mod(mod_lamda_full, k = 3)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.02250 0.79955 0.17494 0.03059 29.00000 0.01670 26.37237
summary(mod_lamda_full)
##
## Call:
## lm(formula = lamda ~ ., data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03239 -0.00625 -0.00057 0.00673 0.02290
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.64e+00 3.98e-01 4.13 0.00018 ***
## k -6.63e-02 1.23e-02 -5.41 3.5e-06 ***
## sigma NA NA NA NA
## c -4.39e-02 5.28e-03 -8.31 3.7e-10 ***
## alpha 3.96e-01 9.62e-02 4.11 0.00020 ***
## omega 4.98e-01 2.65e+00 0.19 0.85212
## miu 7.71e-01 1.75e-01 4.41 7.8e-05 ***
## cases -7.34e-08 1.66e-07 -0.44 0.66132
## deaths 4.80e-07 1.94e-06 0.25 0.80609
## recovered 1.10e-07 6.69e-07 0.16 0.86987
## POP_LATITUDE -2.08e-02 5.06e-03 -4.11 0.00020 ***
## POP_LONGITUDE 6.72e-03 1.74e-03 3.86 0.00042 ***
## PopulationEstimate2018 -1.17e-05 2.91e-06 -4.04 0.00025 ***
## PopTotalMale2017 2.23e-05 5.60e-06 3.98 0.00029 ***
## PopulationEstimate_above65_2017 7.26e-06 1.78e-06 4.08 0.00022 ***
## PopulationDensityperSqMile2010 2.75e-06 1.43e-05 0.19 0.84842
## DiabetesPercentage -7.94e-02 2.08e-02 -3.83 0.00046 ***
## Smokers_Percentage 3.01e-02 8.93e-03 3.37 0.00170 **
## HeartDiseaseMortality -2.66e-03 9.61e-04 -2.77 0.00854 **
## StrokeMortality 1.48e-02 4.65e-03 3.17 0.00292 **
## Hospitals NA NA NA NA
## ICU_beds NA NA NA NA
## HospParticipatinginNetwork2017 NA NA NA NA
## stay_at_home NA NA NA NA
## above_50_gatherings NA NA NA NA
## above_500_gatherings NA NA NA NA
## restaurant_dine_in NA NA NA NA
## entertainment_gym NA NA NA NA
## days 1.95e-04 1.53e-04 1.28 0.20967
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0138 on 39 degrees of freedom
## Multiple R-squared: 0.865, Adjusted R-squared: 0.8
## F-statistic: 13.2 on 19 and 39 DF, p-value: 1.57e-11
# small additive model
mod_lamda_1 = lm(lamda ~ days, data = data_trn)
# large additive model
mod_lamda_2 = lm(lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
test_mod(mod_lamda_1, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.031384 -0.003521 0.231445 0.727519 2.000000 0.024319 39.801177
test_mod(mod_lamda_2, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.02250 0.79955 0.17494 0.03059 20.00000 0.01670 26.37237
summary(mod_lamda_1)
##
## Call:
## lm(formula = lamda ~ days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0627 -0.0235 0.0031 0.0221 0.0679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.075259 0.009075 8.29 2.2e-11 ***
## days -0.000143 0.000160 -0.89 0.38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0309 on 57 degrees of freedom
## Multiple R-squared: 0.0138, Adjusted R-squared: -0.00352
## F-statistic: 0.797 on 1 and 57 DF, p-value: 0.376
summary(mod_lamda_2)
##
## Call:
## lm(formula = lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03239 -0.00625 -0.00057 0.00673 0.02290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.64e+00 3.98e-01 4.13 0.00018 ***
## k -6.63e-02 1.23e-02 -5.41 3.5e-06 ***
## c -4.39e-02 5.28e-03 -8.31 3.7e-10 ***
## alpha 3.96e-01 9.62e-02 4.11 0.00020 ***
## omega 4.98e-01 2.65e+00 0.19 0.85212
## miu 7.71e-01 1.75e-01 4.41 7.8e-05 ***
## cases -7.34e-08 1.66e-07 -0.44 0.66132
## deaths 4.80e-07 1.94e-06 0.25 0.80609
## recovered 1.10e-07 6.69e-07 0.16 0.86987
## POP_LATITUDE -2.08e-02 5.06e-03 -4.11 0.00020 ***
## POP_LONGITUDE 6.72e-03 1.74e-03 3.86 0.00042 ***
## PopulationEstimate2018 -1.17e-05 2.91e-06 -4.04 0.00025 ***
## PopTotalMale2017 2.23e-05 5.60e-06 3.98 0.00029 ***
## PopulationEstimate_above65_2017 7.26e-06 1.78e-06 4.08 0.00022 ***
## PopulationDensityperSqMile2010 2.75e-06 1.43e-05 0.19 0.84842
## DiabetesPercentage -7.94e-02 2.08e-02 -3.83 0.00046 ***
## Smokers_Percentage 3.01e-02 8.93e-03 3.37 0.00170 **
## HeartDiseaseMortality -2.66e-03 9.61e-04 -2.77 0.00854 **
## StrokeMortality 1.48e-02 4.65e-03 3.17 0.00292 **
## days 1.95e-04 1.53e-04 1.28 0.20967
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0138 on 39 degrees of freedom
## Multiple R-squared: 0.865, Adjusted R-squared: 0.8
## F-statistic: 13.2 on 19 and 39 DF, p-value: 1.57e-11
# intermediate model
mod_lamda_3 = lm(lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010
+ log(days) - days
+ stay_at_home - PopulationEstimate2018 - PopTotalMale2017
+ I(POP_LATITUDE ^ 2) + + I(POP_LONGITUDE ^ 2),
data = data_trn)
mod_lamda_4 = lm(lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010 +
log(days) - days,
data = data_trn)
test_mod(mod_lamda_1, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.031384 -0.003521 0.231445 0.727519 2.000000 0.024319 39.801177
test_mod(mod_lamda_2, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.02250 0.79955 0.17494 0.03059 20.00000 0.01670 26.37237
test_mod(mod_lamda_3, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.01787 0.82455 0.20647 0.01453 16.00000 0.01624 24.42386
test_mod(mod_lamda_4, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.01745 0.82845 0.19372 0.01659 15.00000 0.01620 24.31619
summary(mod_lamda_3)
##
## Call:
## lm(formula = lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days + stay_at_home - PopulationEstimate2018 -
## PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2),
## data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03394 -0.00630 -0.00201 0.00661 0.02268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.38e+02 3.90e+01 3.53 0.00099 ***
## k -6.74e-02 1.12e-02 -6.02 3.4e-07 ***
## c -4.40e-02 4.67e-03 -9.43 4.9e-12 ***
## alpha 4.21e-01 7.96e-02 5.29 3.9e-06 ***
## miu 7.92e-01 1.38e-01 5.73 9.2e-07 ***
## POP_LATITUDE -2.57e+00 7.15e-01 -3.59 0.00084 ***
## POP_LONGITUDE 1.85e+00 5.31e-01 3.49 0.00115 **
## PopulationEstimate_above65_2017 -2.07e-05 6.27e-06 -3.29 0.00199 **
## DiabetesPercentage -1.04e+00 2.88e-01 -3.59 0.00084 ***
## Smokers_Percentage 5.50e-01 1.54e-01 3.57 0.00090 ***
## HeartDiseaseMortality -1.02e-02 2.59e-03 -3.93 0.00031 ***
## StrokeMortality 5.21e-02 1.27e-02 4.09 0.00018 ***
## log(days) 7.81e-03 3.14e-03 2.49 0.01682 *
## stay_at_home 1.42e-01 4.21e-02 3.37 0.00158 **
## I(POP_LATITUDE^2) 3.22e-02 9.02e-03 3.58 0.00088 ***
## I(POP_LONGITUDE^2) 9.63e-03 2.77e-03 3.48 0.00117 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0129 on 43 degrees of freedom
## Multiple R-squared: 0.87, Adjusted R-squared: 0.825
## F-statistic: 19.2 on 15 and 43 DF, p-value: 2.6e-14
summary(mod_lamda_4)
##
## Call:
## lm(formula = lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03373 -0.00652 -0.00214 0.00673 0.02291
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.63e+00 3.43e-01 4.74 2.3e-05 ***
## k -6.72e-02 1.10e-02 -6.11 2.3e-07 ***
## c -4.42e-02 4.46e-03 -9.92 8.6e-13 ***
## alpha 4.18e-01 7.46e-02 5.59 1.3e-06 ***
## miu 8.00e-01 1.27e-01 6.30 1.2e-07 ***
## POP_LATITUDE -2.07e-02 4.19e-03 -4.95 1.1e-05 ***
## POP_LONGITUDE 6.72e-03 1.50e-03 4.46 5.5e-05 ***
## PopulationEstimate2018 -1.16e-05 2.61e-06 -4.46 5.6e-05 ***
## PopTotalMale2017 2.21e-05 5.02e-06 4.41 6.6e-05 ***
## PopulationEstimate_above65_2017 7.13e-06 1.47e-06 4.84 1.6e-05 ***
## DiabetesPercentage -7.93e-02 1.61e-02 -4.93 1.2e-05 ***
## Smokers_Percentage 2.99e-02 6.06e-03 4.93 1.2e-05 ***
## HeartDiseaseMortality -2.60e-03 5.73e-04 -4.53 4.5e-05 ***
## StrokeMortality 1.45e-02 2.83e-03 5.13 6.3e-06 ***
## log(days) 7.73e-03 3.06e-03 2.53 0.015 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0128 on 44 degrees of freedom
## Multiple R-squared: 0.87, Adjusted R-squared: 0.828
## F-statistic: 21 on 14 and 44 DF, p-value: 5.59e-15
# intermediate model
# relatively bad models
mod_lamda_5 = lm(lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- (k + c + alpha + omega + miu),
data = data_trn)
mod_lamda_6 = lm(lamda ~ k + c + alpha + omega + miu,
data = data_trn)
test_mod(mod_lamda_1, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.031384 -0.003521 0.231445 0.727519 2.000000 0.024319 39.801177
test_mod(mod_lamda_2, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.02250 0.79955 0.17494 0.03059 20.00000 0.01670 26.37237
test_mod(mod_lamda_3, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.01787 0.82455 0.20647 0.01453 16.00000 0.01624 24.42386
test_mod(mod_lamda_4, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.01745 0.82845 0.19372 0.01659 15.00000 0.01620 24.31619
test_mod(mod_lamda_5, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.03121 0.22337 0.22277 0.23216 15.00000 0.02704 44.73045
test_mod(mod_lamda_6, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.02534 0.60579 0.10363 0.07028 6.00000 0.01508 18.16319
summary(mod_lamda_5)
##
## Call:
## lm(formula = lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - (k + c +
## alpha + omega + miu), data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03981 -0.01853 -0.00046 0.01614 0.05226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.57e-01 6.44e-01 0.24 0.81
## cases -4.08e-08 2.94e-07 -0.14 0.89
## deaths -4.05e-06 3.53e-06 -1.15 0.26
## recovered 1.29e-06 1.10e-06 1.18 0.25
## POP_LATITUDE -3.72e-03 8.36e-03 -0.45 0.66
## POP_LONGITUDE 1.75e-04 2.78e-03 0.06 0.95
## PopulationEstimate2018 2.38e-06 4.50e-06 0.53 0.60
## PopTotalMale2017 -5.04e-06 8.63e-06 -0.58 0.56
## PopulationEstimate_above65_2017 1.05e-06 3.06e-06 0.34 0.73
## PopulationDensityperSqMile2010 3.05e-05 2.32e-05 1.31 0.20
## DiabetesPercentage -1.83e-02 3.49e-02 -0.52 0.60
## Smokers_Percentage 1.44e-02 1.51e-02 0.95 0.35
## HeartDiseaseMortality -1.85e-03 1.68e-03 -1.11 0.27
## StrokeMortality 8.20e-03 8.05e-03 1.02 0.31
## days -2.56e-04 2.45e-04 -1.05 0.30
##
## Residual standard error: 0.0272 on 44 degrees of freedom
## Multiple R-squared: 0.411, Adjusted R-squared: 0.223
## F-statistic: 2.19 on 14 and 44 DF, p-value: 0.0242
summary(mod_lamda_6)
##
## Call:
## lm(formula = lamda ~ k + c + alpha + omega + miu, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.05354 -0.01051 0.00503 0.01083 0.03496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.08436 0.01422 5.93 2.3e-07 ***
## k -0.03141 0.01412 -2.22 0.03042 *
## c -0.04153 0.00648 -6.41 3.9e-08 ***
## alpha 0.42203 0.11036 3.82 0.00035 ***
## omega -2.19340 2.66718 -0.82 0.41455
## miu 0.55378 0.08718 6.35 5.0e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0194 on 53 degrees of freedom
## Multiple R-squared: 0.64, Adjusted R-squared: 0.606
## F-statistic: 18.8 on 5 and 53 DF, p-value: 1.03e-10
mod_lamda = mod_lamda_4
# diagnostics(mod_lamda, testit = FALSE)
c
# full additive model
mod_c_full = lm(c ~ ., data = data_trn)
test_mod(mod_c_full, k = 4)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.3844 0.6300 0.1073 0.8302 29.0000 0.2652 Inf
summary(mod_c_full)
##
## Call:
## lm(formula = c ~ ., data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4762 -0.1244 -0.0493 0.1559 0.4969
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.48e+01 7.73e+00 3.22 0.00262 **
## k -1.04e+00 2.44e-01 -4.26 0.00012 ***
## sigma NA NA NA NA
## lamda -1.46e+01 1.75e+00 -8.31 3.7e-10 ***
## alpha 4.62e+00 1.96e+00 2.35 0.02379 *
## omega 1.76e+01 4.82e+01 0.37 0.71646
## miu 1.51e+01 3.05e+00 4.96 1.4e-05 ***
## cases -2.66e-06 3.00e-06 -0.89 0.38107
## deaths 5.05e-05 3.45e-05 1.46 0.15120
## recovered -4.43e-06 1.22e-05 -0.36 0.71804
## POP_LATITUDE -2.96e-01 9.95e-02 -2.97 0.00508 **
## POP_LONGITUDE 1.03e-01 3.35e-02 3.06 0.00395 **
## PopulationEstimate2018 -2.06e-04 5.37e-05 -3.83 0.00045 ***
## PopTotalMale2017 3.94e-04 1.03e-04 3.82 0.00047 ***
## PopulationEstimate_above65_2017 1.12e-04 3.43e-05 3.28 0.00220 **
## PopulationDensityperSqMile2010 -2.93e-04 2.56e-04 -1.14 0.25923
## DiabetesPercentage -1.07e+00 4.09e-01 -2.62 0.01258 *
## Smokers_Percentage 3.28e-01 1.77e-01 1.85 0.07137 .
## HeartDiseaseMortality -2.41e-02 1.87e-02 -1.29 0.20631
## StrokeMortality 1.59e-01 9.14e-02 1.74 0.09055 .
## Hospitals NA NA NA NA
## ICU_beds NA NA NA NA
## HospParticipatinginNetwork2017 NA NA NA NA
## stay_at_home NA NA NA NA
## above_50_gatherings NA NA NA NA
## above_500_gatherings NA NA NA NA
## restaurant_dine_in NA NA NA NA
## entertainment_gym NA NA NA NA
## days 4.81e-03 2.74e-03 1.76 0.08674 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.251 on 39 degrees of freedom
## Multiple R-squared: 0.751, Adjusted R-squared: 0.63
## F-statistic: 6.2 on 19 and 39 DF, p-value: 7.93e-07
# small additive model
mod_c_1 = lm(c ~ days, data = data_trn)
# large additive model
mod_c_2 = lm(c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
test_mod(mod_c_1, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 4.204e-01 -6.359e-04 8.737e-01 1.839e-07 2.000e+00 4.396e-01 Inf
test_mod(mod_c_2, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.3844 0.6300 0.1073 0.8302 20.0000 0.2652 Inf
summary(mod_c_1)
##
## Call:
## lm(formula = c ~ days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.434 -0.295 -0.198 0.450 0.734
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.23630 0.12145 1.95 0.057 .
## days 0.00210 0.00214 0.98 0.331
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.414 on 57 degrees of freedom
## Multiple R-squared: 0.0166, Adjusted R-squared: -0.000636
## F-statistic: 0.963 on 1 and 57 DF, p-value: 0.331
summary(mod_c_2)
##
## Call:
## lm(formula = c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4762 -0.1244 -0.0493 0.1559 0.4969
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.48e+01 7.73e+00 3.22 0.00262 **
## k -1.04e+00 2.44e-01 -4.26 0.00012 ***
## lamda -1.46e+01 1.75e+00 -8.31 3.7e-10 ***
## alpha 4.62e+00 1.96e+00 2.35 0.02379 *
## omega 1.76e+01 4.82e+01 0.37 0.71646
## miu 1.51e+01 3.05e+00 4.96 1.4e-05 ***
## cases -2.66e-06 3.00e-06 -0.89 0.38107
## deaths 5.05e-05 3.45e-05 1.46 0.15120
## recovered -4.43e-06 1.22e-05 -0.36 0.71804
## POP_LATITUDE -2.96e-01 9.95e-02 -2.97 0.00508 **
## POP_LONGITUDE 1.03e-01 3.35e-02 3.06 0.00395 **
## PopulationEstimate2018 -2.06e-04 5.37e-05 -3.83 0.00045 ***
## PopTotalMale2017 3.94e-04 1.03e-04 3.82 0.00047 ***
## PopulationEstimate_above65_2017 1.12e-04 3.43e-05 3.28 0.00220 **
## PopulationDensityperSqMile2010 -2.93e-04 2.56e-04 -1.14 0.25923
## DiabetesPercentage -1.07e+00 4.09e-01 -2.62 0.01258 *
## Smokers_Percentage 3.28e-01 1.77e-01 1.85 0.07137 .
## HeartDiseaseMortality -2.41e-02 1.87e-02 -1.29 0.20631
## StrokeMortality 1.59e-01 9.14e-02 1.74 0.09055 .
## days 4.81e-03 2.74e-03 1.76 0.08674 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.251 on 39 degrees of freedom
## Multiple R-squared: 0.751, Adjusted R-squared: 0.63
## F-statistic: 6.2 on 19 and 39 DF, p-value: 7.93e-07
# intermediate model
mod_c_3 = lm(c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010
+ log(days) - days
+ stay_at_home - PopulationEstimate2018 - PopTotalMale2017
+ I(POP_LATITUDE ^ 2) + + I(POP_LONGITUDE ^ 2),
data = data_trn)
mod_c_4 = lm(c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010 +
log(days) - days,
data = data_trn)
test_mod(mod_c_1, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 4.204e-01 -6.359e-04 8.737e-01 1.839e-07 2.000e+00 4.396e-01 Inf
test_mod(mod_c_2, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.3844 0.6300 0.1073 0.8302 20.0000 0.2652 Inf
test_mod(mod_c_3, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.3285 0.6605 0.2078 0.1439 16.0000 0.2592 Inf
test_mod(mod_c_4, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.3132 0.6627 0.2233 0.1210 15.0000 0.2645 Inf
summary(mod_c_3)
##
## Call:
## lm(formula = c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days + stay_at_home - PopulationEstimate2018 -
## PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2),
## data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5448 -0.1371 -0.0494 0.1446 0.4962
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.85e+03 7.01e+02 4.07 0.00020 ***
## k -1.08e+00 2.31e-01 -4.67 2.9e-05 ***
## lamda -1.53e+01 1.62e+00 -9.43 4.9e-12 ***
## alpha 5.92e+00 1.68e+00 3.52 0.00103 **
## miu 1.52e+01 2.52e+00 6.01 3.5e-07 ***
## POP_LATITUDE -5.29e+01 1.29e+01 -4.10 0.00018 ***
## POP_LONGITUDE 3.86e+01 9.54e+00 4.04 0.00021 ***
## PopulationEstimate_above65_2017 -4.41e-04 1.12e-04 -3.93 0.00030 ***
## DiabetesPercentage -2.13e+01 5.20e+00 -4.10 0.00018 ***
## Smokers_Percentage 1.14e+01 2.78e+00 4.09 0.00019 ***
## HeartDiseaseMortality -2.00e-01 4.74e-02 -4.22 0.00013 ***
## StrokeMortality 1.01e+00 2.33e-01 4.33 8.7e-05 ***
## log(days) 1.40e-01 5.88e-02 2.39 0.02155 *
## stay_at_home 3.00e+00 7.54e-01 3.97 0.00027 ***
## I(POP_LATITUDE^2) 6.65e-01 1.62e-01 4.09 0.00018 ***
## I(POP_LONGITUDE^2) 2.01e-01 4.97e-02 4.04 0.00022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.241 on 43 degrees of freedom
## Multiple R-squared: 0.748, Adjusted R-squared: 0.661
## F-statistic: 8.52 on 15 and 43 DF, p-value: 1.51e-08
summary(mod_c_4)
##
## Call:
## lm(formula = c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5776 -0.1298 -0.0518 0.1315 0.5565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.83e+01 6.68e+00 4.23 0.00012 ***
## k -1.12e+00 2.25e-01 -4.98 1.0e-05 ***
## lamda -1.56e+01 1.58e+00 -9.92 8.6e-13 ***
## alpha 6.45e+00 1.56e+00 4.14 0.00015 ***
## miu 1.46e+01 2.44e+00 6.00 3.3e-07 ***
## POP_LATITUDE -3.61e-01 8.18e-02 -4.42 6.4e-05 ***
## POP_LONGITUDE 1.15e-01 2.93e-02 3.93 0.00029 ***
## PopulationEstimate2018 -2.08e-04 5.01e-05 -4.15 0.00015 ***
## PopTotalMale2017 3.96e-04 9.64e-05 4.10 0.00017 ***
## PopulationEstimate_above65_2017 1.25e-04 2.86e-05 4.39 7.1e-05 ***
## DiabetesPercentage -1.36e+00 3.16e-01 -4.31 9.2e-05 ***
## Smokers_Percentage 4.92e-01 1.21e-01 4.07 0.00019 ***
## HeartDiseaseMortality -4.29e-02 1.13e-02 -3.78 0.00047 ***
## StrokeMortality 2.43e-01 5.64e-02 4.32 8.8e-05 ***
## log(days) 1.52e-01 5.72e-02 2.65 0.01113 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.24 on 44 degrees of freedom
## Multiple R-squared: 0.744, Adjusted R-squared: 0.663
## F-statistic: 9.14 on 14 and 44 DF, p-value: 6.67e-09
# intermediate model
# relatively bad models
mod_c_5 = lm(c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- (k + lamda + alpha + omega + miu),
data = data_trn)
mod_c_6 = lm(c ~ k + lamda + alpha + omega + miu,
data = data_trn)
test_mod(mod_c_1, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 4.204e-01 -6.359e-04 8.737e-01 1.839e-07 2.000e+00 4.396e-01 Inf
test_mod(mod_c_2, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.3844 0.6300 0.1073 0.8302 20.0000 0.2652 Inf
test_mod(mod_c_3, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.3285 0.6605 0.2078 0.1439 16.0000 0.2592 Inf
test_mod(mod_c_4, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.3132 0.6627 0.2233 0.1210 15.0000 0.2645 Inf
test_mod(mod_c_5, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.511125 -0.046239 0.065749 0.007058 15.000000 0.448106 Inf
test_mod(mod_c_6, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.359923 0.444061 0.003014 0.128642 6.000000 0.266542 Inf
summary(mod_c_5)
##
## Call:
## lm(formula = c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - (k + lamda +
## alpha + omega + miu), data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5860 -0.2928 -0.0346 0.2441 0.6879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.59e+00 1.00e+01 -0.86 0.396
## cases -9.08e-06 4.58e-06 -1.98 0.054 .
## deaths 1.07e-04 5.50e-05 1.95 0.057 .
## recovered 8.43e-06 1.71e-05 0.49 0.624
## POP_LATITUDE 1.40e-01 1.30e-01 1.08 0.287
## POP_LONGITUDE -3.45e-02 4.33e-02 -0.80 0.430
## PopulationEstimate2018 -2.53e-06 6.99e-05 -0.04 0.971
## PopTotalMale2017 7.75e-06 1.34e-04 0.06 0.954
## PopulationEstimate_above65_2017 -1.82e-05 4.76e-05 -0.38 0.704
## PopulationDensityperSqMile2010 -5.00e-04 3.61e-04 -1.38 0.173
## DiabetesPercentage 5.57e-01 5.43e-01 1.03 0.310
## Smokers_Percentage -3.40e-01 2.35e-01 -1.45 0.155
## HeartDiseaseMortality 3.89e-02 2.61e-02 1.49 0.143
## StrokeMortality -1.52e-01 1.25e-01 -1.22 0.231
## days 3.81e-03 3.81e-03 1.00 0.323
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.423 on 44 degrees of freedom
## Multiple R-squared: 0.206, Adjusted R-squared: -0.0462
## F-statistic: 0.817 on 14 and 44 DF, p-value: 0.647
summary(mod_c_6)
##
## Call:
## lm(formula = c ~ k + lamda + alpha + omega + miu, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8087 -0.1702 -0.0649 0.1658 0.5454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.146 0.246 4.66 2.2e-05 ***
## k -0.471 0.226 -2.08 0.042 *
## lamda -10.522 1.640 -6.41 3.9e-08 ***
## alpha 4.111 1.902 2.16 0.035 *
## omega -36.160 42.431 -0.85 0.398
## miu 7.844 1.493 5.25 2.7e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.308 on 53 degrees of freedom
## Multiple R-squared: 0.492, Adjusted R-squared: 0.444
## F-statistic: 10.3 on 5 and 53 DF, p-value: 6.44e-07
mod_c = mod_c_3
# diagnostics(mod_c, testit = FALSE)
alpha
# full additive model
mod_alpha_full = lm(alpha ~ ., data = data_trn)
test_mod(mod_alpha_full, k = 5)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.02896 0.51252 0.02816 0.05511 29.00000 0.02461 Inf
summary(mod_alpha_full)
##
## Call:
## lm(formula = alpha ~ ., data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03333 -0.00934 0.00033 0.00861 0.04447
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.47e+00 6.20e-01 -2.36 0.0231 *
## k 6.10e-02 2.03e-02 3.00 0.0046 **
## sigma NA NA NA NA
## lamda 7.64e-01 1.86e-01 4.11 0.0002 ***
## c 2.69e-02 1.14e-02 2.35 0.0238 *
## omega 8.14e+00 3.45e+00 2.36 0.0234 *
## miu -2.87e-01 2.94e-01 -0.98 0.3354
## cases 1.56e-07 2.30e-07 0.68 0.5017
## deaths 2.61e-06 2.67e-06 0.98 0.3351
## recovered -1.42e-06 9.02e-07 -1.57 0.1238
## POP_LATITUDE 2.06e-02 7.74e-03 2.66 0.0114 *
## POP_LONGITUDE -5.75e-03 2.69e-03 -2.14 0.0389 *
## PopulationEstimate2018 9.50e-06 4.56e-06 2.08 0.0439 *
## PopTotalMale2017 -1.79e-05 8.77e-06 -2.04 0.0483 *
## PopulationEstimate_above65_2017 -6.43e-06 2.77e-06 -2.32 0.0256 *
## PopulationDensityperSqMile2010 -4.30e-05 1.86e-05 -2.31 0.0265 *
## DiabetesPercentage 8.44e-02 3.10e-02 2.72 0.0097 **
## Smokers_Percentage -3.85e-02 1.27e-02 -3.03 0.0043 **
## HeartDiseaseMortality 3.77e-03 1.33e-03 2.83 0.0072 **
## StrokeMortality -1.93e-02 6.55e-03 -2.94 0.0054 **
## Hospitals NA NA NA NA
## ICU_beds NA NA NA NA
## HospParticipatinginNetwork2017 NA NA NA NA
## stay_at_home NA NA NA NA
## above_50_gatherings NA NA NA NA
## above_500_gatherings NA NA NA NA
## restaurant_dine_in NA NA NA NA
## entertainment_gym NA NA NA NA
## days 2.00e-05 2.17e-04 0.09 0.9272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0192 on 39 degrees of freedom
## Multiple R-squared: 0.672, Adjusted R-squared: 0.513
## F-statistic: 4.21 on 19 and 39 DF, p-value: 0.0000719
# small additive model
mod_alpha_1 = lm(alpha ~ days, data = data_trn)
# large additive model
mod_alpha_2 = lm(alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
test_mod(mod_alpha_1, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 2.751e-02 4.329e-02 6.639e-02 1.278e-09 2.000e+00 2.533e-02 Inf
test_mod(mod_alpha_2, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.02896 0.51252 0.02816 0.05511 20.00000 0.02461 Inf
summary(mod_alpha_1)
##
## Call:
## lm(formula = alpha ~ days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.02535 -0.01478 -0.00667 0.00167 0.12315
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.029069 0.007897 3.68 0.00052 ***
## days -0.000265 0.000139 -1.90 0.06200 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0269 on 57 degrees of freedom
## Multiple R-squared: 0.0598, Adjusted R-squared: 0.0433
## F-statistic: 3.62 on 1 and 57 DF, p-value: 0.062
summary(mod_alpha_2)
##
## Call:
## lm(formula = alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03333 -0.00934 0.00033 0.00861 0.04447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.47e+00 6.20e-01 -2.36 0.0231 *
## k 6.10e-02 2.03e-02 3.00 0.0046 **
## lamda 7.64e-01 1.86e-01 4.11 0.0002 ***
## c 2.69e-02 1.14e-02 2.35 0.0238 *
## omega 8.14e+00 3.45e+00 2.36 0.0234 *
## miu -2.87e-01 2.94e-01 -0.98 0.3354
## cases 1.56e-07 2.30e-07 0.68 0.5017
## deaths 2.61e-06 2.67e-06 0.98 0.3351
## recovered -1.42e-06 9.02e-07 -1.57 0.1238
## POP_LATITUDE 2.06e-02 7.74e-03 2.66 0.0114 *
## POP_LONGITUDE -5.75e-03 2.69e-03 -2.14 0.0389 *
## PopulationEstimate2018 9.50e-06 4.56e-06 2.08 0.0439 *
## PopTotalMale2017 -1.79e-05 8.77e-06 -2.04 0.0483 *
## PopulationEstimate_above65_2017 -6.43e-06 2.77e-06 -2.32 0.0256 *
## PopulationDensityperSqMile2010 -4.30e-05 1.86e-05 -2.31 0.0265 *
## DiabetesPercentage 8.44e-02 3.10e-02 2.72 0.0097 **
## Smokers_Percentage -3.85e-02 1.27e-02 -3.03 0.0043 **
## HeartDiseaseMortality 3.77e-03 1.33e-03 2.83 0.0072 **
## StrokeMortality -1.93e-02 6.55e-03 -2.94 0.0054 **
## days 2.00e-05 2.17e-04 0.09 0.9272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0192 on 39 degrees of freedom
## Multiple R-squared: 0.672, Adjusted R-squared: 0.513
## F-statistic: 4.21 on 19 and 39 DF, p-value: 0.0000719
# intermediate model
mod_alpha_3 = lm(alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010
+ log(days) - days
+ stay_at_home - PopulationEstimate2018 - PopTotalMale2017
+ I(POP_LATITUDE ^ 2) + + I(POP_LONGITUDE ^ 2),
data = data_trn)
mod_alpha_4 = lm(alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010 +
log(days) - days,
data = data_trn)
test_mod(mod_alpha_1, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 2.751e-02 4.329e-02 6.639e-02 1.278e-09 2.000e+00 2.533e-02 Inf
test_mod(mod_alpha_2, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.02896 0.51252 0.02816 0.05511 20.00000 0.02461 Inf
test_mod(mod_alpha_3, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.025490 0.509506 0.008626 0.008274 16.000000 0.022995 Inf
test_mod(mod_alpha_4, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.025671 0.485239 0.007565 0.019305 15.000000 0.023262 Inf
summary(mod_alpha_3)
##
## Call:
## lm(formula = alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days + stay_at_home - PopulationEstimate2018 -
## PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2),
## data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03054 -0.01042 0.00047 0.00781 0.06194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.77e+01 6.42e+01 -1.52 0.1354
## k 8.45e-02 1.86e-02 4.54 4.5e-05 ***
## lamda 9.36e-01 1.77e-01 5.29 3.9e-06 ***
## c 3.78e-02 1.07e-02 3.52 0.0010 **
## miu -5.21e-01 2.62e-01 -1.99 0.0530 .
## POP_LATITUDE 1.87e+00 1.18e+00 1.58 0.1210
## POP_LONGITUDE -1.29e+00 8.74e-01 -1.47 0.1481
## PopulationEstimate_above65_2017 1.34e-05 1.03e-05 1.31 0.1985
## DiabetesPercentage 7.53e-01 4.76e-01 1.58 0.1210
## Smokers_Percentage -3.95e-01 2.55e-01 -1.55 0.1284
## HeartDiseaseMortality 8.19e-03 4.33e-03 1.89 0.0653 .
## StrokeMortality -4.47e-02 2.13e-02 -2.10 0.0417 *
## log(days) -1.69e-02 4.29e-03 -3.93 0.0003 ***
## stay_at_home -9.37e-02 6.90e-02 -1.36 0.1817
## I(POP_LATITUDE^2) -2.34e-02 1.49e-02 -1.57 0.1238
## I(POP_LONGITUDE^2) -6.68e-03 4.56e-03 -1.47 0.1499
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0193 on 43 degrees of freedom
## Multiple R-squared: 0.636, Adjusted R-squared: 0.51
## F-statistic: 5.02 on 15 and 43 DF, p-value: 0.0000154
summary(mod_alpha_4)
##
## Call:
## lm(formula = alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03305 -0.01194 0.00035 0.00802 0.05910
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.93e+00 5.82e-01 -3.32 0.00180 **
## k 8.67e-02 1.90e-02 4.56 4.1e-05 ***
## lamda 9.95e-01 1.78e-01 5.59 1.3e-06 ***
## c 4.35e-02 1.05e-02 4.14 0.00015 ***
## miu -6.93e-01 2.49e-01 -2.78 0.00794 **
## POP_LATITUDE 2.47e-02 7.17e-03 3.44 0.00127 **
## POP_LONGITUDE -8.54e-03 2.49e-03 -3.43 0.00131 **
## PopulationEstimate2018 1.49e-05 4.30e-06 3.46 0.00121 **
## PopTotalMale2017 -2.85e-05 8.26e-06 -3.45 0.00125 **
## PopulationEstimate_above65_2017 -8.49e-06 2.51e-06 -3.39 0.00150 **
## DiabetesPercentage 1.01e-01 2.69e-02 3.77 0.00048 ***
## Smokers_Percentage -3.85e-02 1.01e-02 -3.81 0.00043 ***
## HeartDiseaseMortality 3.11e-03 9.64e-04 3.22 0.00239 **
## StrokeMortality -1.83e-02 4.79e-03 -3.81 0.00042 ***
## log(days) -1.66e-02 4.39e-03 -3.78 0.00047 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0197 on 44 degrees of freedom
## Multiple R-squared: 0.609, Adjusted R-squared: 0.485
## F-statistic: 4.91 on 14 and 44 DF, p-value: 0.0000239
# intermediate model
# relatively bad models
mod_alpha_5 = lm(alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- (k + lamda + c + omega + miu),
data = data_trn)
mod_alpha_6 = lm(alpha ~ k + lamda + c + omega + miu,
data = data_trn)
test_mod(mod_alpha_1, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 2.751e-02 4.329e-02 6.639e-02 1.278e-09 2.000e+00 2.533e-02 Inf
test_mod(mod_alpha_2, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.02896 0.51252 0.02816 0.05511 20.00000 0.02461 Inf
test_mod(mod_alpha_3, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.025490 0.509506 0.008626 0.008274 16.000000 0.022995 Inf
test_mod(mod_alpha_4, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.025671 0.485239 0.007565 0.019305 15.000000 0.023262 Inf
test_mod(mod_alpha_5, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 3.376e-02 1.156e-01 2.723e-02 1.127e-05 1.500e+01 2.189e-02 Inf
test_mod(mod_alpha_6, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.02497976 0.39740134 0.00033377 0.00004824 6.00000000 0.01597093 Inf
summary(mod_alpha_5)
##
## Call:
## lm(formula = alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - (k + lamda +
## c + omega + miu), data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.04609 -0.01070 -0.00096 0.00612 0.10069
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.15e+00 6.13e-01 -1.88 0.067 .
## cases -3.10e-08 2.80e-07 -0.11 0.912
## deaths 2.89e-06 3.36e-06 0.86 0.394
## recovered -7.14e-07 1.05e-06 -0.68 0.498
## POP_LATITUDE 1.68e-02 7.95e-03 2.11 0.041 *
## POP_LONGITUDE -4.95e-03 2.65e-03 -1.87 0.068 .
## PopulationEstimate2018 7.92e-06 4.28e-06 1.85 0.071 .
## PopTotalMale2017 -1.50e-05 8.21e-06 -1.83 0.074 .
## PopulationEstimate_above65_2017 -4.55e-06 2.91e-06 -1.56 0.126
## PopulationDensityperSqMile2010 -4.13e-05 2.21e-05 -1.87 0.068 .
## DiabetesPercentage 7.06e-02 3.32e-02 2.13 0.039 *
## Smokers_Percentage -3.17e-02 1.44e-02 -2.20 0.033 *
## HeartDiseaseMortality 3.14e-03 1.59e-03 1.97 0.056 .
## StrokeMortality -1.54e-02 7.65e-03 -2.02 0.050 *
## days -1.80e-04 2.33e-04 -0.77 0.443
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0259 on 44 degrees of freedom
## Multiple R-squared: 0.329, Adjusted R-squared: 0.116
## F-statistic: 1.54 on 14 and 44 DF, p-value: 0.136
summary(mod_alpha_6)
##
## Call:
## lm(formula = alpha ~ k + lamda + c + omega + miu, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03314 -0.00985 -0.00122 0.00752 0.07859
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05160 0.01893 -2.73 0.00866 **
## k 0.02827 0.01580 1.79 0.07935 .
## lamda 0.51238 0.13399 3.82 0.00035 ***
## c 0.01970 0.00912 2.16 0.03520 *
## omega 11.31149 2.51653 4.49 0.000038 ***
## miu -0.31904 0.11971 -2.66 0.01018 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0213 on 53 degrees of freedom
## Multiple R-squared: 0.449, Adjusted R-squared: 0.397
## F-statistic: 8.65 on 5 and 53 DF, p-value: 4.81e-06
mod_alpha = mod_alpha_4
# diagnostics(mod_alpha, testit = FALSE)
omega
# full additive model
mod_omega_full = lm(omega ~ ., data = data_trn)
test_mod(mod_omega_full, k = 6)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0014028 0.5044044 0.0275331 0.3528000 29.0000000 0.0007156 Inf
summary(mod_omega_full)
##
## Call:
## lm(formula = omega ~ ., data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0017815 -0.0003972 -0.0000795 0.0003670 0.0016757
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.58e-02 2.82e-02 -1.27 0.21271
## k 8.68e-04 9.69e-04 0.89 0.37630
## sigma NA NA NA NA
## lamda 1.81e-03 9.66e-03 0.19 0.85212
## c 1.94e-04 5.30e-04 0.37 0.71646
## alpha 1.54e-02 6.51e-03 2.36 0.02342 *
## miu 1.17e-03 1.29e-02 0.09 0.92811
## cases -5.02e-09 1.00e-08 -0.50 0.61947
## deaths -1.17e-07 1.16e-07 -1.01 0.31698
## recovered 5.32e-08 3.95e-08 1.35 0.18586
## POP_LATITUDE 3.95e-04 3.60e-04 1.10 0.27871
## POP_LONGITUDE -1.93e-04 1.20e-04 -1.61 0.11503
## PopulationEstimate2018 2.40e-07 2.05e-07 1.17 0.24989
## PopTotalMale2017 -4.57e-07 3.94e-07 -1.16 0.25348
## PopulationEstimate_above65_2017 -1.61e-07 1.26e-07 -1.28 0.20858
## PopulationDensityperSqMile2010 8.04e-07 8.54e-07 0.94 0.35190
## DiabetesPercentage 1.99e-03 1.43e-03 1.39 0.17223
## Smokers_Percentage -6.98e-04 6.02e-04 -1.16 0.25337
## HeartDiseaseMortality 5.72e-05 6.28e-05 0.91 0.36748
## StrokeMortality -3.36e-04 3.10e-04 -1.09 0.28441
## Hospitals NA NA NA NA
## ICU_beds NA NA NA NA
## HospParticipatinginNetwork2017 NA NA NA NA
## stay_at_home NA NA NA NA
## above_50_gatherings NA NA NA NA
## above_500_gatherings NA NA NA NA
## restaurant_dine_in NA NA NA NA
## entertainment_gym NA NA NA NA
## days -3.03e-05 8.08e-06 -3.75 0.00058 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000834 on 39 degrees of freedom
## Multiple R-squared: 0.667, Adjusted R-squared: 0.504
## F-statistic: 4.11 on 19 and 39 DF, p-value: 0.0000931
# small additive model
mod_omega_1 = lm(omega ~ days, data = data_trn)
# large additive model
mod_omega_2 = lm(omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
test_mod(mod_omega_1, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0010009 0.3186830 0.0578837 0.0002535 2.0000000 0.0009460 Inf
test_mod(mod_omega_2, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0014028 0.5044044 0.0275331 0.3528000 20.0000000 0.0007156 Inf
summary(mod_omega_1)
##
## Call:
## lm(formula = omega ~ days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.001438 -0.000550 -0.000128 0.000312 0.003806
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.52e-03 2.87e-04 8.78 3.5e-12 ***
## days -2.69e-05 5.07e-06 -5.30 1.9e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000977 on 57 degrees of freedom
## Multiple R-squared: 0.33, Adjusted R-squared: 0.319
## F-statistic: 28.1 on 1 and 57 DF, p-value: 1.92e-06
summary(mod_omega_2)
##
## Call:
## lm(formula = omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0017815 -0.0003972 -0.0000795 0.0003670 0.0016757
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.58e-02 2.82e-02 -1.27 0.21271
## k 8.68e-04 9.69e-04 0.89 0.37630
## lamda 1.81e-03 9.66e-03 0.19 0.85212
## c 1.94e-04 5.30e-04 0.37 0.71646
## alpha 1.54e-02 6.51e-03 2.36 0.02342 *
## miu 1.17e-03 1.29e-02 0.09 0.92811
## cases -5.02e-09 1.00e-08 -0.50 0.61947
## deaths -1.17e-07 1.16e-07 -1.01 0.31698
## recovered 5.32e-08 3.95e-08 1.35 0.18586
## POP_LATITUDE 3.95e-04 3.60e-04 1.10 0.27871
## POP_LONGITUDE -1.93e-04 1.20e-04 -1.61 0.11503
## PopulationEstimate2018 2.40e-07 2.05e-07 1.17 0.24989
## PopTotalMale2017 -4.57e-07 3.94e-07 -1.16 0.25348
## PopulationEstimate_above65_2017 -1.61e-07 1.26e-07 -1.28 0.20858
## PopulationDensityperSqMile2010 8.04e-07 8.54e-07 0.94 0.35190
## DiabetesPercentage 1.99e-03 1.43e-03 1.39 0.17223
## Smokers_Percentage -6.98e-04 6.02e-04 -1.16 0.25337
## HeartDiseaseMortality 5.72e-05 6.28e-05 0.91 0.36748
## StrokeMortality -3.36e-04 3.10e-04 -1.09 0.28441
## days -3.03e-05 8.08e-06 -3.75 0.00058 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000834 on 39 degrees of freedom
## Multiple R-squared: 0.667, Adjusted R-squared: 0.504
## F-statistic: 4.11 on 19 and 39 DF, p-value: 0.0000931
# intermediate model
mod_omega_3 = lm(omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010
+ log(days) - days
+ stay_at_home - PopulationEstimate2018 - PopTotalMale2017
+ I(POP_LATITUDE ^ 2) + + I(POP_LONGITUDE ^ 2),
data = data_trn)
mod_omega_4 = lm(omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010 +
log(days) - days,
data = data_trn)
test_mod(mod_omega_1, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0010009 0.3186830 0.0578837 0.0002535 2.0000000 0.0009460 Inf
test_mod(mod_omega_2, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0014028 0.5044044 0.0275331 0.3528000 20.0000000 0.0007156 Inf
test_mod(mod_omega_3, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.001163 0.604801 0.030730 0.350808 17.000000 0.000540 Inf
test_mod(mod_omega_4, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0011062 0.6139613 0.0219400 0.3405360 16.0000000 0.0005402 Inf
summary(mod_omega_3)
##
## Call:
## lm(formula = omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days + stay_at_home - PopulationEstimate2018 -
## PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2),
## data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0016894 -0.0004084 -0.0000319 0.0003170 0.0016192
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.49e+00 2.55e+00 -1.37 0.18
## k 1.31e-03 8.75e-04 1.49 0.14
## lamda 8.57e-03 8.79e-03 0.98 0.33
## c 4.17e-04 4.71e-04 0.89 0.38
## alpha 7.54e-03 5.90e-03 1.28 0.21
## miu 5.88e-03 1.06e-02 0.56 0.58
## POP_LATITUDE 6.49e-02 4.70e-02 1.38 0.17
## POP_LONGITUDE -4.70e-02 3.46e-02 -1.36 0.18
## PopulationEstimate_above65_2017 5.07e-07 4.04e-07 1.25 0.22
## DiabetesPercentage 2.69e-02 1.89e-02 1.42 0.16
## Smokers_Percentage -1.43e-02 1.01e-02 -1.41 0.17
## HeartDiseaseMortality 2.89e-04 1.74e-04 1.66 0.10
## StrokeMortality -1.45e-03 8.64e-04 -1.68 0.10
## log(days) -1.26e-03 1.94e-04 -6.51 7.2e-08 ***
## stay_at_home -3.61e-03 2.73e-03 -1.32 0.19
## I(POP_LATITUDE^2) -8.15e-04 5.92e-04 -1.38 0.18
## I(POP_LONGITUDE^2) -2.45e-04 1.81e-04 -1.35 0.18
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000744 on 42 degrees of freedom
## Multiple R-squared: 0.714, Adjusted R-squared: 0.605
## F-statistic: 6.55 on 16 and 42 DF, p-value: 4.96e-07
summary(mod_omega_4)
##
## Call:
## lm(formula = omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0016885 -0.0004072 -0.0000307 0.0003192 0.0016125
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.34e-02 2.43e-02 -1.78 0.081 .
## k 1.31e-03 8.61e-04 1.52 0.136
## lamda 8.58e-03 8.68e-03 0.99 0.328
## c 4.14e-04 4.62e-04 0.90 0.375
## alpha 7.45e-03 5.62e-03 1.32 0.192
## miu 6.04e-03 1.01e-02 0.60 0.552
## POP_LATITUDE 5.60e-04 3.01e-04 1.86 0.070 .
## POP_LONGITUDE -2.34e-04 1.04e-04 -2.24 0.030 *
## PopulationEstimate2018 3.04e-07 1.81e-07 1.68 0.101
## PopTotalMale2017 -5.80e-07 3.47e-07 -1.67 0.102
## PopulationEstimate_above65_2017 -1.94e-07 1.05e-07 -1.85 0.071 .
## DiabetesPercentage 2.61e-03 1.15e-03 2.26 0.029 *
## Smokers_Percentage -1.08e-03 4.35e-04 -2.49 0.017 *
## HeartDiseaseMortality 9.78e-05 4.00e-05 2.44 0.019 *
## StrokeMortality -5.00e-04 2.06e-04 -2.43 0.019 *
## log(days) -1.26e-03 1.89e-04 -6.69 3.6e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000736 on 43 degrees of freedom
## Multiple R-squared: 0.714, Adjusted R-squared: 0.614
## F-statistic: 7.15 on 15 and 43 DF, p-value: 1.79e-07
# intermediate model
# relatively bad models
mod_omega_5 = lm(omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- (k + lamda + c + alpha + miu),
data = data_trn)
mod_omega_6 = lm(omega ~ k + lamda + c + alpha + miu,
data = data_trn)
test_mod(mod_omega_1, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0010009 0.3186830 0.0578837 0.0002535 2.0000000 0.0009460 Inf
test_mod(mod_omega_2, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0014028 0.5044044 0.0275331 0.3528000 20.0000000 0.0007156 Inf
test_mod(mod_omega_3, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.001163 0.604801 0.030730 0.350808 17.000000 0.000540 Inf
test_mod(mod_omega_4, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0011062 0.6139613 0.0219400 0.3405360 16.0000000 0.0005402 Inf
test_mod(mod_omega_5, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0011632 0.4042799 0.0001611 0.0260695 15.0000000 0.0007283 Inf
test_mod(mod_omega_6, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.00150272 0.29957618 0.18905170 0.00009094 6.00000000 0.00085551 Inf
summary(mod_omega_5)
##
## Call:
## lm(formula = omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - (k + lamda +
## c + alpha + miu), data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0023595 -0.0003765 0.0000225 0.0003963 0.0029578
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.88e-02 2.17e-02 -2.25 0.02941 *
## cases -7.38e-09 9.90e-09 -0.74 0.46038
## deaths -4.70e-08 1.19e-07 -0.39 0.69477
## recovered 4.38e-08 3.70e-08 1.19 0.24185
## POP_LATITUDE 6.03e-04 2.81e-04 2.14 0.03756 *
## POP_LONGITUDE -2.47e-04 9.36e-05 -2.64 0.01135 *
## PopulationEstimate2018 3.28e-07 1.51e-07 2.17 0.03570 *
## PopTotalMale2017 -6.24e-07 2.90e-07 -2.15 0.03699 *
## PopulationEstimate_above65_2017 -2.05e-07 1.03e-07 -1.99 0.05320 .
## PopulationDensityperSqMile2010 1.35e-07 7.81e-07 0.17 0.86377
## DiabetesPercentage 2.79e-03 1.17e-03 2.38 0.02158 *
## Smokers_Percentage -1.06e-03 5.09e-04 -2.09 0.04219 *
## HeartDiseaseMortality 9.58e-05 5.64e-05 1.70 0.09642 .
## StrokeMortality -5.11e-04 2.71e-04 -1.89 0.06559 .
## days -3.11e-05 8.24e-06 -3.78 0.00047 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000914 on 44 degrees of freedom
## Multiple R-squared: 0.548, Adjusted R-squared: 0.404
## F-statistic: 3.81 on 14 and 44 DF, p-value: 0.000331
summary(mod_omega_6)
##
## Call:
## lm(formula = omega ~ k + lamda + c + alpha + miu, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.001569 -0.000517 -0.000270 0.000534 0.002377
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.001837 0.000904 2.03 0.047 *
## k -0.001094 0.000741 -1.48 0.146
## lamda -0.005744 0.006985 -0.82 0.415
## c -0.000374 0.000439 -0.85 0.398
## alpha 0.024399 0.005428 4.49 0.000038 ***
## miu 0.010868 0.005730 1.90 0.063 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000991 on 53 degrees of freedom
## Multiple R-squared: 0.36, Adjusted R-squared: 0.3
## F-statistic: 5.96 on 5 and 53 DF, p-value: 0.000191
mod_omega = mod_omega_4
# diagnostics(mod_omega, testit = FALSE)
miu
# full additive model
mod_miu_full = lm(miu ~ ., data = data_trn)
test_mod(mod_miu_full, k = 7)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.017161 0.894881 0.352920 0.928797 29.000000 0.009206 41.837402
summary(mod_miu_full)
##
## Call:
## lm(formula = miu ~ ., data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.018275 -0.005736 -0.000703 0.005370 0.019586
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.22e+00 2.99e-01 -4.08 0.00022 ***
## k 2.53e-02 1.14e-02 2.21 0.03272 *
## sigma NA NA NA NA
## lamda 4.32e-01 9.79e-02 4.41 7.8e-05 ***
## c 2.56e-02 5.15e-03 4.96 1.4e-05 ***
## alpha -8.31e-02 8.52e-02 -0.98 0.33538
## omega 1.80e-01 1.98e+00 0.09 0.92811
## cases -1.75e-07 1.22e-07 -1.44 0.15723
## deaths -1.08e-06 1.44e-06 -0.75 0.45731
## recovered 1.19e-06 4.63e-07 2.57 0.01415 *
## POP_LATITUDE 1.44e-02 3.89e-03 3.71 0.00065 ***
## POP_LONGITUDE -4.99e-03 1.31e-03 -3.82 0.00046 ***
## PopulationEstimate2018 1.01e-05 2.02e-06 5.03 1.1e-05 ***
## PopTotalMale2017 -1.96e-05 3.85e-06 -5.09 9.5e-06 ***
## PopulationEstimate_above65_2017 -4.73e-06 1.40e-06 -3.38 0.00165 **
## PopulationDensityperSqMile2010 2.30e-05 1.00e-05 2.29 0.02739 *
## DiabetesPercentage 4.43e-02 1.68e-02 2.64 0.01188 *
## Smokers_Percentage -1.10e-02 7.39e-03 -1.49 0.14522
## HeartDiseaseMortality 6.60e-04 7.79e-04 0.85 0.40224
## StrokeMortality -4.27e-03 3.84e-03 -1.11 0.27299
## Hospitals NA NA NA NA
## ICU_beds NA NA NA NA
## HospParticipatinginNetwork2017 NA NA NA NA
## stay_at_home NA NA NA NA
## above_50_gatherings NA NA NA NA
## above_500_gatherings NA NA NA NA
## restaurant_dine_in NA NA NA NA
## entertainment_gym NA NA NA NA
## days -1.30e-04 1.15e-04 -1.13 0.26392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0103 on 39 degrees of freedom
## Multiple R-squared: 0.929, Adjusted R-squared: 0.895
## F-statistic: 27 on 19 and 39 DF, p-value: <2e-16
# small additive model
mod_miu_1 = lm(miu ~ days, data = data_trn)
# large additive model
mod_miu_2 = lm(miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
test_mod(mod_miu_1, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.032721 -0.015921 0.032293 0.000945 2.000000 0.025313 149.431195
test_mod(mod_miu_2, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.017161 0.894881 0.352920 0.928797 20.000000 0.009206 41.837402
summary(mod_miu_1)
##
## Call:
## lm(formula = miu ~ days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.04092 -0.02621 -0.00039 0.01751 0.10555
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0363221 0.0094359 3.85 0.0003 ***
## days 0.0000503 0.0001666 0.30 0.7639
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0321 on 57 degrees of freedom
## Multiple R-squared: 0.0016, Adjusted R-squared: -0.0159
## F-statistic: 0.0911 on 1 and 57 DF, p-value: 0.764
summary(mod_miu_2)
##
## Call:
## lm(formula = miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.018275 -0.005736 -0.000703 0.005370 0.019586
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.22e+00 2.99e-01 -4.08 0.00022 ***
## k 2.53e-02 1.14e-02 2.21 0.03272 *
## lamda 4.32e-01 9.79e-02 4.41 7.8e-05 ***
## c 2.56e-02 5.15e-03 4.96 1.4e-05 ***
## alpha -8.31e-02 8.52e-02 -0.98 0.33538
## omega 1.80e-01 1.98e+00 0.09 0.92811
## cases -1.75e-07 1.22e-07 -1.44 0.15723
## deaths -1.08e-06 1.44e-06 -0.75 0.45731
## recovered 1.19e-06 4.63e-07 2.57 0.01415 *
## POP_LATITUDE 1.44e-02 3.89e-03 3.71 0.00065 ***
## POP_LONGITUDE -4.99e-03 1.31e-03 -3.82 0.00046 ***
## PopulationEstimate2018 1.01e-05 2.02e-06 5.03 1.1e-05 ***
## PopTotalMale2017 -1.96e-05 3.85e-06 -5.09 9.5e-06 ***
## PopulationEstimate_above65_2017 -4.73e-06 1.40e-06 -3.38 0.00165 **
## PopulationDensityperSqMile2010 2.30e-05 1.00e-05 2.29 0.02739 *
## DiabetesPercentage 4.43e-02 1.68e-02 2.64 0.01188 *
## Smokers_Percentage -1.10e-02 7.39e-03 -1.49 0.14522
## HeartDiseaseMortality 6.60e-04 7.79e-04 0.85 0.40224
## StrokeMortality -4.27e-03 3.84e-03 -1.11 0.27299
## days -1.30e-04 1.15e-04 -1.13 0.26392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0103 on 39 degrees of freedom
## Multiple R-squared: 0.929, Adjusted R-squared: 0.895
## F-statistic: 27 on 19 and 39 DF, p-value: <2e-16
# intermediate model
mod_miu_3 = lm(miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010
+ log(days) - days
+ stay_at_home - PopulationEstimate2018 - PopTotalMale2017
+ I(POP_LATITUDE ^ 2) + + I(POP_LONGITUDE ^ 2),
data = data_trn)
mod_miu_4 = lm(miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- cases - deaths - omega - recovered - PopulationDensityperSqMile2010 +
log(days) - days,
data = data_trn)
test_mod(mod_miu_1, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.032721 -0.015921 0.032293 0.000945 2.000000 0.025313 149.431195
test_mod(mod_miu_2, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.017161 0.894881 0.352920 0.928797 20.000000 0.009206 41.837402
test_mod(mod_miu_3, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.01807 0.88667 0.20086 0.93226 16.00000 0.01229 47.47454
test_mod(mod_miu_4, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.01703 0.88089 0.19835 0.83854 15.00000 0.01365 69.23757
summary(mod_miu_3)
##
## Call:
## lm(formula = miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days + stay_at_home - PopulationEstimate2018 -
## PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2),
## data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.020163 -0.005322 -0.000809 0.005689 0.023672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.56e+02 2.80e+01 -5.58 1.5e-06 ***
## k 3.14e-02 1.17e-02 2.69 0.010 *
## lamda 5.46e-01 9.54e-02 5.73 9.2e-07 ***
## c 3.01e-02 5.01e-03 6.01 3.5e-07 ***
## alpha -1.62e-01 8.13e-02 -1.99 0.053 .
## POP_LATITUDE 2.87e+00 5.16e-01 5.56 1.6e-06 ***
## POP_LONGITUDE -2.12e+00 3.81e-01 -5.56 1.6e-06 ***
## PopulationEstimate_above65_2017 2.44e-05 4.49e-06 5.44 2.4e-06 ***
## DiabetesPercentage 1.16e+00 2.08e-01 5.60 1.4e-06 ***
## Smokers_Percentage -6.18e-01 1.11e-01 -5.56 1.6e-06 ***
## HeartDiseaseMortality 1.07e-02 1.91e-03 5.59 1.4e-06 ***
## StrokeMortality -5.26e-02 9.53e-03 -5.52 1.8e-06 ***
## log(days) -2.21e-03 2.77e-03 -0.80 0.429
## stay_at_home -1.65e-01 3.02e-02 -5.45 2.3e-06 ***
## I(POP_LATITUDE^2) -3.61e-02 6.51e-03 -5.55 1.7e-06 ***
## I(POP_LONGITUDE^2) -1.10e-02 1.99e-03 -5.56 1.6e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0107 on 43 degrees of freedom
## Multiple R-squared: 0.916, Adjusted R-squared: 0.887
## F-statistic: 31.3 on 15 and 43 DF, p-value: <2e-16
summary(mod_miu_4)
##
## Call:
## lm(formula = miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.02263 -0.00679 -0.00071 0.00571 0.02444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.77e+00 2.47e-01 -7.17 6.5e-09 ***
## k 3.60e-02 1.17e-02 3.08 0.0035 **
## lamda 5.93e-01 9.41e-02 6.30 1.2e-07 ***
## c 3.07e-02 5.12e-03 6.00 3.3e-07 ***
## alpha -2.16e-01 7.75e-02 -2.78 0.0079 **
## POP_LATITUDE 2.26e-02 2.95e-03 7.67 1.2e-09 ***
## POP_LONGITUDE -7.10e-03 1.14e-03 -6.24 1.5e-07 ***
## PopulationEstimate2018 1.34e-05 1.81e-06 7.41 2.9e-09 ***
## PopTotalMale2017 -2.58e-05 3.45e-06 -7.47 2.3e-09 ***
## PopulationEstimate_above65_2017 -6.78e-06 1.19e-06 -5.69 9.6e-07 ***
## DiabetesPercentage 7.36e-02 1.32e-02 5.56 1.5e-06 ***
## Smokers_Percentage -2.62e-02 5.16e-03 -5.08 7.3e-06 ***
## HeartDiseaseMortality 2.37e-03 4.79e-04 4.96 1.1e-05 ***
## StrokeMortality -1.15e-02 2.55e-03 -4.51 4.8e-05 ***
## log(days) -3.23e-03 2.78e-03 -1.16 0.2514
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.011 on 44 degrees of freedom
## Multiple R-squared: 0.91, Adjusted R-squared: 0.881
## F-statistic: 31.6 on 14 and 44 DF, p-value: <2e-16
# intermediate model
# relatively bad models
mod_miu_5 = lm(miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
- (k + lamda + c + alpha + omega),
data = data_trn)
mod_miu_6 = lm(miu ~ k + lamda + c + alpha + omega,
data = data_trn)
test_mod(mod_miu_1, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.032721 -0.015921 0.032293 0.000945 2.000000 0.025313 149.431195
test_mod(mod_miu_2, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.017161 0.894881 0.352920 0.928797 20.000000 0.009206 41.837402
test_mod(mod_miu_3, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.01807 0.88667 0.20086 0.93226 16.00000 0.01229 47.47454
test_mod(mod_miu_4, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.01703 0.88089 0.19835 0.83854 15.00000 0.01365 69.23757
test_mod(mod_miu_5, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.016233 0.832344 0.263518 0.454574 15.000000 0.008545 38.753492
test_mod(mod_miu_6, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0272487 0.4796789 0.5420675 0.0002727 6.0000000 0.0187462 86.0715224
summary(mod_miu_5)
##
## Call:
## lm(formula = miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - (k + lamda +
## c + alpha + omega), data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03007 -0.00653 -0.00129 0.00774 0.02857
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.07e+00 3.09e-01 -3.46 0.00122 **
## cases -4.11e-07 1.41e-07 -2.91 0.00571 **
## deaths 2.74e-08 1.70e-06 0.02 0.98721
## recovered 1.90e-06 5.28e-07 3.59 0.00082 ***
## POP_LATITUDE 1.26e-02 4.01e-03 3.15 0.00294 **
## POP_LONGITUDE -4.47e-03 1.34e-03 -3.34 0.00170 **
## PopulationEstimate2018 9.10e-06 2.16e-06 4.21 0.00012 ***
## PopTotalMale2017 -1.78e-05 4.14e-06 -4.28 0.000098 ***
## PopulationEstimate_above65_2017 -3.48e-06 1.47e-06 -2.36 0.02253 *
## PopulationDensityperSqMile2010 2.61e-05 1.12e-05 2.34 0.02369 *
## DiabetesPercentage 3.37e-02 1.68e-02 2.01 0.05055 .
## Smokers_Percentage -6.10e-03 7.26e-03 -0.84 0.40531
## HeartDiseaseMortality 2.01e-04 8.05e-04 0.25 0.80393
## StrokeMortality -1.14e-03 3.86e-03 -0.29 0.77024
## days -8.34e-05 1.18e-04 -0.71 0.48179
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0131 on 44 degrees of freedom
## Multiple R-squared: 0.873, Adjusted R-squared: 0.832
## F-statistic: 21.6 on 14 and 44 DF, p-value: 3.44e-15
summary(mod_miu_6)
##
## Call:
## lm(formula = miu ~ k + lamda + c + alpha + omega, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03282 -0.01332 -0.00260 0.00867 0.07146
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04172 0.02101 -1.99 0.052 .
## k 0.01310 0.01744 0.75 0.456
## lamda 0.78055 0.12288 6.35 5.0e-08 ***
## c 0.04364 0.00831 5.25 2.7e-06 ***
## alpha -0.37039 0.13898 -2.66 0.010 *
## omega 5.84918 3.08374 1.90 0.063 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.023 on 53 degrees of freedom
## Multiple R-squared: 0.525, Adjusted R-squared: 0.48
## F-statistic: 11.7 on 5 and 53 DF, p-value: 1.22e-07
mod_miu = mod_miu_2
# diagnostics(mod_miu, testit = FALSE)
summary(mod_k)
##
## Call:
## lm(formula = k ~ lamda + c + alpha + omega + miu, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0289 -0.0445 0.0356 0.0886 0.2567
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1002 0.0791 13.90 <2e-16 ***
## lamda -2.7175 1.2218 -2.22 0.030 *
## c -0.1608 0.0772 -2.08 0.042 *
## alpha 2.0142 1.1260 1.79 0.079 .
## omega -36.1277 24.4670 -1.48 0.146
## miu 0.8043 1.0704 0.75 0.456
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.18 on 53 degrees of freedom
## Multiple R-squared: 0.162, Adjusted R-squared: 0.0833
## F-statistic: 2.05 on 5 and 53 DF, p-value: 0.0859
summary(mod_lamda)
##
## Call:
## lm(formula = lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03373 -0.00652 -0.00214 0.00673 0.02291
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.63e+00 3.43e-01 4.74 2.3e-05 ***
## k -6.72e-02 1.10e-02 -6.11 2.3e-07 ***
## c -4.42e-02 4.46e-03 -9.92 8.6e-13 ***
## alpha 4.18e-01 7.46e-02 5.59 1.3e-06 ***
## miu 8.00e-01 1.27e-01 6.30 1.2e-07 ***
## POP_LATITUDE -2.07e-02 4.19e-03 -4.95 1.1e-05 ***
## POP_LONGITUDE 6.72e-03 1.50e-03 4.46 5.5e-05 ***
## PopulationEstimate2018 -1.16e-05 2.61e-06 -4.46 5.6e-05 ***
## PopTotalMale2017 2.21e-05 5.02e-06 4.41 6.6e-05 ***
## PopulationEstimate_above65_2017 7.13e-06 1.47e-06 4.84 1.6e-05 ***
## DiabetesPercentage -7.93e-02 1.61e-02 -4.93 1.2e-05 ***
## Smokers_Percentage 2.99e-02 6.06e-03 4.93 1.2e-05 ***
## HeartDiseaseMortality -2.60e-03 5.73e-04 -4.53 4.5e-05 ***
## StrokeMortality 1.45e-02 2.83e-03 5.13 6.3e-06 ***
## log(days) 7.73e-03 3.06e-03 2.53 0.015 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0128 on 44 degrees of freedom
## Multiple R-squared: 0.87, Adjusted R-squared: 0.828
## F-statistic: 21 on 14 and 44 DF, p-value: 5.59e-15
summary(mod_c)
##
## Call:
## lm(formula = c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days + stay_at_home - PopulationEstimate2018 -
## PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2),
## data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5448 -0.1371 -0.0494 0.1446 0.4962
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.85e+03 7.01e+02 4.07 0.00020 ***
## k -1.08e+00 2.31e-01 -4.67 2.9e-05 ***
## lamda -1.53e+01 1.62e+00 -9.43 4.9e-12 ***
## alpha 5.92e+00 1.68e+00 3.52 0.00103 **
## miu 1.52e+01 2.52e+00 6.01 3.5e-07 ***
## POP_LATITUDE -5.29e+01 1.29e+01 -4.10 0.00018 ***
## POP_LONGITUDE 3.86e+01 9.54e+00 4.04 0.00021 ***
## PopulationEstimate_above65_2017 -4.41e-04 1.12e-04 -3.93 0.00030 ***
## DiabetesPercentage -2.13e+01 5.20e+00 -4.10 0.00018 ***
## Smokers_Percentage 1.14e+01 2.78e+00 4.09 0.00019 ***
## HeartDiseaseMortality -2.00e-01 4.74e-02 -4.22 0.00013 ***
## StrokeMortality 1.01e+00 2.33e-01 4.33 8.7e-05 ***
## log(days) 1.40e-01 5.88e-02 2.39 0.02155 *
## stay_at_home 3.00e+00 7.54e-01 3.97 0.00027 ***
## I(POP_LATITUDE^2) 6.65e-01 1.62e-01 4.09 0.00018 ***
## I(POP_LONGITUDE^2) 2.01e-01 4.97e-02 4.04 0.00022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.241 on 43 degrees of freedom
## Multiple R-squared: 0.748, Adjusted R-squared: 0.661
## F-statistic: 8.52 on 15 and 43 DF, p-value: 1.51e-08
summary(mod_alpha)
##
## Call:
## lm(formula = alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03305 -0.01194 0.00035 0.00802 0.05910
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.93e+00 5.82e-01 -3.32 0.00180 **
## k 8.67e-02 1.90e-02 4.56 4.1e-05 ***
## lamda 9.95e-01 1.78e-01 5.59 1.3e-06 ***
## c 4.35e-02 1.05e-02 4.14 0.00015 ***
## miu -6.93e-01 2.49e-01 -2.78 0.00794 **
## POP_LATITUDE 2.47e-02 7.17e-03 3.44 0.00127 **
## POP_LONGITUDE -8.54e-03 2.49e-03 -3.43 0.00131 **
## PopulationEstimate2018 1.49e-05 4.30e-06 3.46 0.00121 **
## PopTotalMale2017 -2.85e-05 8.26e-06 -3.45 0.00125 **
## PopulationEstimate_above65_2017 -8.49e-06 2.51e-06 -3.39 0.00150 **
## DiabetesPercentage 1.01e-01 2.69e-02 3.77 0.00048 ***
## Smokers_Percentage -3.85e-02 1.01e-02 -3.81 0.00043 ***
## HeartDiseaseMortality 3.11e-03 9.64e-04 3.22 0.00239 **
## StrokeMortality -1.83e-02 4.79e-03 -3.81 0.00042 ***
## log(days) -1.66e-02 4.39e-03 -3.78 0.00047 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0197 on 44 degrees of freedom
## Multiple R-squared: 0.609, Adjusted R-squared: 0.485
## F-statistic: 4.91 on 14 and 44 DF, p-value: 0.0000239
summary(mod_omega)
##
## Call:
## lm(formula = omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma - cases -
## deaths - omega - recovered - PopulationDensityperSqMile2010 +
## log(days) - days, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0016885 -0.0004072 -0.0000307 0.0003192 0.0016125
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.34e-02 2.43e-02 -1.78 0.081 .
## k 1.31e-03 8.61e-04 1.52 0.136
## lamda 8.58e-03 8.68e-03 0.99 0.328
## c 4.14e-04 4.62e-04 0.90 0.375
## alpha 7.45e-03 5.62e-03 1.32 0.192
## miu 6.04e-03 1.01e-02 0.60 0.552
## POP_LATITUDE 5.60e-04 3.01e-04 1.86 0.070 .
## POP_LONGITUDE -2.34e-04 1.04e-04 -2.24 0.030 *
## PopulationEstimate2018 3.04e-07 1.81e-07 1.68 0.101
## PopTotalMale2017 -5.80e-07 3.47e-07 -1.67 0.102
## PopulationEstimate_above65_2017 -1.94e-07 1.05e-07 -1.85 0.071 .
## DiabetesPercentage 2.61e-03 1.15e-03 2.26 0.029 *
## Smokers_Percentage -1.08e-03 4.35e-04 -2.49 0.017 *
## HeartDiseaseMortality 9.78e-05 4.00e-05 2.44 0.019 *
## StrokeMortality -5.00e-04 2.06e-04 -2.43 0.019 *
## log(days) -1.26e-03 1.89e-04 -6.69 3.6e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000736 on 43 degrees of freedom
## Multiple R-squared: 0.714, Adjusted R-squared: 0.614
## F-statistic: 7.15 on 15 and 43 DF, p-value: 1.79e-07
summary(mod_miu)
##
## Call:
## lm(formula = miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 -
## stay_at_home - above_50_gatherings - above_500_gatherings -
## restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.018275 -0.005736 -0.000703 0.005370 0.019586
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.22e+00 2.99e-01 -4.08 0.00022 ***
## k 2.53e-02 1.14e-02 2.21 0.03272 *
## lamda 4.32e-01 9.79e-02 4.41 7.8e-05 ***
## c 2.56e-02 5.15e-03 4.96 1.4e-05 ***
## alpha -8.31e-02 8.52e-02 -0.98 0.33538
## omega 1.80e-01 1.98e+00 0.09 0.92811
## cases -1.75e-07 1.22e-07 -1.44 0.15723
## deaths -1.08e-06 1.44e-06 -0.75 0.45731
## recovered 1.19e-06 4.63e-07 2.57 0.01415 *
## POP_LATITUDE 1.44e-02 3.89e-03 3.71 0.00065 ***
## POP_LONGITUDE -4.99e-03 1.31e-03 -3.82 0.00046 ***
## PopulationEstimate2018 1.01e-05 2.02e-06 5.03 1.1e-05 ***
## PopTotalMale2017 -1.96e-05 3.85e-06 -5.09 9.5e-06 ***
## PopulationEstimate_above65_2017 -4.73e-06 1.40e-06 -3.38 0.00165 **
## PopulationDensityperSqMile2010 2.30e-05 1.00e-05 2.29 0.02739 *
## DiabetesPercentage 4.43e-02 1.68e-02 2.64 0.01188 *
## Smokers_Percentage -1.10e-02 7.39e-03 -1.49 0.14522
## HeartDiseaseMortality 6.60e-04 7.79e-04 0.85 0.40224
## StrokeMortality -4.27e-03 3.84e-03 -1.11 0.27299
## days -1.30e-04 1.15e-04 -1.13 0.26392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0103 on 39 degrees of freedom
## Multiple R-squared: 0.929, Adjusted R-squared: 0.895
## F-statistic: 27 on 19 and 39 DF, p-value: <2e-16
test_mod(mod_k, k = 1)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 2.135e-01 8.328e-02 1.932e-02 7.076e-10 6.000e+00 5.237e-02 5.250e+00
test_mod(mod_lamda, k = 3)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.01745 0.82845 0.19372 0.01659 15.00000 0.01620 24.31619
test_mod(mod_c, k = 4)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.3285 0.6605 0.2078 0.1439 16.0000 0.2592 Inf
test_mod(mod_alpha, k = 5)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.025671 0.485239 0.007565 0.019305 15.000000 0.023262 Inf
test_mod(mod_omega, k = 6)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.0011062 0.6139613 0.0219400 0.3405360 16.0000000 0.0005402 Inf
test_mod(mod_miu, k = 7)
## loocv_rmse adj_r2 bp_pval.BP sw_pval num_params test_rmse perc_err
## 0.017161 0.894881 0.352920 0.928797 20.000000 0.009206 41.837402
diagnostics(mod_k, testit = FALSE)
diagnostics(mod_lamda, testit = FALSE)
diagnostics(mod_c, testit = FALSE)
diagnostics(mod_alpha, testit = FALSE)
diagnostics(mod_omega, testit = FALSE)
diagnostics(mod_miu, testit = FALSE)
See project Homepage.
get_bp_decision = function(model, alpha) {
decide = unname(bptest(model)$p.value < alpha)
ifelse(decide, "Reject", "Fail to Reject")
}
get_bp_pval = function(model) {
bptest(model)$p.value
}
get_sw_decision = function(model, alpha) {
decide = unname(shapiro.test(resid(model))$p.value < alpha)
ifelse(decide, "Reject", "Fail to Reject")
}
get_sw_pval = function(model) {
shapiro.test(resid(model))$p.value
}
get_num_params = function(model) {
length(coef(model))
}
get_loocv_rmse = function(model, is_log, k) {
ifelse(
is_log,
sqrt(mean(na.omit(((data_trn[, k] - exp(fitted(model))) / (1 - hatvalues(model))) ^ 2))),
sqrt(mean((resid(model) / (1 - hatvalues(model))) ^ 2))
)
}
get_adj_r2 = function(model) {
summary(model)$adj.r.squared
}
test_mod = function(model, is_log = FALSE, k = 1){
c(loocv_rmse = get_loocv_rmse(model, is_log, k),
adj_r2 = get_adj_r2(model),
bp_pval = get_bp_pval(model),
sw_pval = get_sw_pval(model),
num_params = get_num_params(model),
test_rmse = get_test_rmse(model, k),
perc_err = get_perc_err(model, k))
}
diagnostics = function(model, pcol = "grey", lcol = "dodgerblue", alpha = 0.05, plotit = TRUE, testit = TRUE){
if (plotit){
par(mfrow = c(1, 2), pty="s")
plot(fitted(model), resid(model), col = "grey", pch = 20,
xlab = "Fitted", ylab = "Residual",
main = "Fitted versus Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
qqnorm(resid(model), col = pcol)
qqline(resid(model), col = lcol, lwd = 2)
}
if (testit){
list(p_val = shapiro.test(resid(model))$p,
decision = ifelse(test = shapiro.test(resid(model))$p < alpha,
yes = "Reject", no = "Fail to Reject"))
}
}
get_test_rmse = function(model, k) {
sqrt(mean((data_tst[, k] - predict(model, newdata = data_tst))^ 2))
}
get_perc_err = function(model, k) {
actual = data_tst[, k]
predicted = predict(model, newdata = data_tst)
100 * mean((abs(actual - predicted)) / actual)
}