Introduction

This is part 3 of the project “Covid-19 Policy Decision Helper”. In this part, we try to model the relationship between the SEIR model parameters and a variety of social factors, including demographic, medical and policy factors. We aim at obtaining models that are interpretive as well as predictive; in order words, we are hoping to find models that are simple, accessible and easy to be interpreted and understood, so that people can gain some insights of what is significant to the way a pandemic develops, but at the same time, we are also striving to find models that are most helpful making predictions among those explainable models.

With such goals in mind, we engaged a relatively small number of variables in our study–variables that seem significant to us intuitively, from the most accessible open data source.

The modeling methods we applied include the following:

Data source:

Data Preprocessing

StateOfInterest = c("Arizona", "California", "Minnesota", "New Mexico", "New York", 
                   "Oklahoma", "South Carolina", "Tennessee", "Utah", "Virginia", 
                   "West Virginia", "Wisconsin")

Demographic & Policy Data (County-Level)

library(readr)
county_data_abridged = read_csv("county_data_abridged.csv")
dim(county_data_abridged)
## [1] 3244   87
names(county_data_abridged)
##  [1] "countyFIPS"                        "STATEFP"                          
##  [3] "COUNTYFP"                          "CountyName"                       
##  [5] "StateName"                         "State"                            
##  [7] "lat"                               "lon"                              
##  [9] "POP_LATITUDE"                      "POP_LONGITUDE"                    
## [11] "CensusRegionName"                  "CensusDivisionName"               
## [13] "Rural-UrbanContinuumCode2013"      "PopulationEstimate2018"           
## [15] "PopTotalMale2017"                  "PopTotalFemale2017"               
## [17] "FracMale2017"                      "PopulationEstimate65+2017"        
## [19] "PopulationDensityperSqMile2010"    "CensusPopulation2010"             
## [21] "MedianAge2010"                     "#EligibleforMedicare2018"         
## [23] "MedicareEnrollment,AgedTot2017"    "3-YrDiabetes2015-17"              
## [25] "DiabetesPercentage"                "HeartDiseaseMortality"            
## [27] "StrokeMortality"                   "Smokers_Percentage"               
## [29] "RespMortalityRate2014"             "#FTEHospitalTotal2017"            
## [31] "TotalM.D.'s,TotNon-FedandFed2017"  "#HospParticipatinginNetwork2017"  
## [33] "#Hospitals"                        "#ICU_beds"                        
## [35] "dem_to_rep_ratio"                  "PopMale<52010"                    
## [37] "PopFmle<52010"                     "PopMale5-92010"                   
## [39] "PopFmle5-92010"                    "PopMale10-142010"                 
## [41] "PopFmle10-142010"                  "PopMale15-192010"                 
## [43] "PopFmle15-192010"                  "PopMale20-242010"                 
## [45] "PopFmle20-242010"                  "PopMale25-292010"                 
## [47] "PopFmle25-292010"                  "PopMale30-342010"                 
## [49] "PopFmle30-342010"                  "PopMale35-442010"                 
## [51] "PopFmle35-442010"                  "PopMale45-542010"                 
## [53] "PopFmle45-542010"                  "PopMale55-592010"                 
## [55] "PopFmle55-592010"                  "PopMale60-642010"                 
## [57] "PopFmle60-642010"                  "PopMale65-742010"                 
## [59] "PopFmle65-742010"                  "PopMale75-842010"                 
## [61] "PopFmle75-842010"                  "PopMale>842010"                   
## [63] "PopFmle>842010"                    "3-YrMortalityAge<1Year2015-17"    
## [65] "3-YrMortalityAge1-4Years2015-17"   "3-YrMortalityAge5-14Years2015-17" 
## [67] "3-YrMortalityAge15-24Years2015-17" "3-YrMortalityAge25-34Years2015-17"
## [69] "3-YrMortalityAge35-44Years2015-17" "3-YrMortalityAge45-54Years2015-17"
## [71] "3-YrMortalityAge55-64Years2015-17" "3-YrMortalityAge65-74Years2015-17"
## [73] "3-YrMortalityAge75-84Years2015-17" "3-YrMortalityAge85+Years2015-17"  
## [75] "mortality2015-17Estimated"         "stay at home"                     
## [77] ">50 gatherings"                    ">500 gatherings"                  
## [79] "public schools"                    "restaurant dine-in"               
## [81] "entertainment/gym"                 "federal guidelines"               
## [83] "foreign travel ban"                "SVIPercentile"                    
## [85] "HPSAShortage"                      "HPSAServedPop"                    
## [87] "HPSAUnderservedPop"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "#EligibleforMedicare2018"] = "EligibleforMedicare2018"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "#FTEHospitalTotal2017"] = "FTEHospitalTotal2017"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "#HospParticipatinginNetwork2017"] = "HospParticipatinginNetwork2017"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "#Hospitals"] = "Hospitals"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "#ICU_beds"] = "ICU_beds"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "PopulationEstimate65+2017"] = "PopulationEstimate_above65_2017"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "stay at home"] = "stay_at_home"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == ">50 gatherings"] = "above_50_gatherings"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == ">500 gatherings"] = "above_500_gatherings"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "public schools"] = "public_schools"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "restaurant dine-in"] = "restaurant_dine_in"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "entertainment/gym"] = "entertainment_gym"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "federal guidelines"] = "federal_guidelines"
colnames(county_data_abridged)[colnames(county_data_abridged) 
                               == "foreign travel ban"] = "foreign_travel_ban"
data = subset(county_data_abridged,
              select = c(State, CountyName, POP_LATITUDE, POP_LONGITUDE,
                         PopulationEstimate2018, PopTotalMale2017, 
                         PopulationEstimate_above65_2017, PopulationDensityperSqMile2010, 
                         DiabetesPercentage, Smokers_Percentage, 
                         HeartDiseaseMortality, StrokeMortality, 
                         Hospitals, ICU_beds, HospParticipatinginNetwork2017, 
                         stay_at_home, above_50_gatherings, above_500_gatherings, 
                         restaurant_dine_in, entertainment_gym))
data = na.omit(data)
data = droplevels(data)

data$stay_at_home = data$stay_at_home - range(data$stay_at_home)[1]
data$above_50_gatherings = data$above_50_gatherings - range(data$above_50_gatherings)[1]
data$above_500_gatherings = data$above_500_gatherings - range(data$above_500_gatherings)[1]
data$restaurant_dine_in = data$restaurant_dine_in - range(data$restaurant_dine_in)[1]
data$entertainment_gym = data$entertainment_gym - range(data$entertainment_gym)[1]

str(data)
## tibble [2,585 × 20] (S3: tbl_df/tbl/data.frame)
##  $ State                          : chr [1:2585] "Alabama" "Alabama" "Alabama" "Alabama" ...
##  $ CountyName                     : chr [1:2585] "Autauga" "Baldwin" "Barbour" "Bibb" ...
##  $ POP_LATITUDE                   : num [1:2585] 32.5 30.5 31.8 33 34 ...
##  $ POP_LONGITUDE                  : num [1:2585] -86.5 -87.8 -85.3 -87.1 -86.6 ...
##  $ PopulationEstimate2018         : num [1:2585] 55601 218022 24881 22400 57840 ...
##  $ PopTotalMale2017               : num [1:2585] 27007 103225 13335 12138 28607 ...
##  $ PopulationEstimate_above65_2017: num [1:2585] 8392 42413 4757 3632 10351 ...
##  $ PopulationDensityperSqMile2010 : num [1:2585] 91.8 114.7 31 36.8 88.9 ...
##  $ DiabetesPercentage             : num [1:2585] 9.9 8.5 15.7 13.3 14.9 22.4 16.9 15.6 17.5 12.2 ...
##  $ Smokers_Percentage             : num [1:2585] 18.1 17.5 22 19.1 19.2 ...
##  $ HeartDiseaseMortality          : num [1:2585] 204 183 220 226 225 ...
##  $ StrokeMortality                : num [1:2585] 56.1 41.9 49 57.2 52.8 54.1 59 44 45.2 47.3 ...
##  $ Hospitals                      : num [1:2585] 1 3 1 1 1 1 1 2 0 1 ...
##  $ ICU_beds                       : num [1:2585] 6 51 5 0 6 0 7 24 0 0 ...
##  $ HospParticipatinginNetwork2017 : num [1:2585] 0 0 0 0 1 0 0 0 0 0 ...
##  $ stay_at_home                   : num [1:2585] 16 16 16 16 16 16 16 16 16 16 ...
##  $ above_50_gatherings            : num [1:2585] 5 5 5 5 5 5 5 5 5 5 ...
##  $ above_500_gatherings           : num [1:2585] 2 2 2 2 2 2 2 2 2 2 ...
##  $ restaurant_dine_in             : num [1:2585] 7 7 7 7 7 7 7 7 7 7 ...
##  $ entertainment_gym              : num [1:2585] 16 16 16 16 16 16 16 16 16 16 ...
##  - attr(*, "na.action")= 'omit' Named int [1:659] 68 69 70 71 72 73 74 75 76 77 ...
##   ..- attr(*, "names")= chr [1:659] "68" "69" "70" "71" ...
data_demographic_county = data

Infection Data (State-Level)

Covid-19 Cases

timeseries = read_csv("../datasets/timeseries.csv")
data = timeseries
data = subset(data, country == "United States" & level == "state")
data = subset(data, !(name %in% c("Unassigned cases, Arkansas, US", 
              "Unassigned cases, Georgia, US", "Unassigned cases, Illinois, US",
              "Unassigned cases, Iowa, US", "Unassigned cases, Maine, US",
              "Unassigned cases, Massachusetts, US", "Unassigned cases, North Dakota, US",
              "Washington, D.C., US")))
data$state = matrix(unlist(strsplit(as.character(data$name), ", ")), ncol = 2, byrow = TRUE)[, 1]
data = subset(data,
              select = c(state, date, cases, deaths, recovered))

data = subset(data, state %in% StateOfInterest)

data$state =     as.factor(data$state)
data$cases =     as.numeric(data$cases)
data$deaths =    as.numeric(data$deaths)
data$recovered = as.numeric(data$recovered)

data = na.omit(data)
data = droplevels(data)

str(data)
## tibble [1,578 × 5] (S3: tbl_df/tbl/data.frame)
##  $ state    : Factor w/ 12 levels "Arizona","California",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ date     : Date[1:1578], format: "2020-04-14" "2020-04-15" ...
##  $ cases    : num [1:1578] 3806 3962 4234 4507 4719 ...
##  $ deaths   : num [1:1578] 131 142 150 169 177 184 187 208 229 249 ...
##  $ recovered: num [1:1578] 249 385 460 539 539 ...
##  - attr(*, "problems")= tibble [1,666,558 × 5] (S3: tbl_df/tbl/data.frame)
##   ..$ row     : int [1:1666558] 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 ...
##   ..$ col     : chr [1:1666558] "state" "state" "state" "state" ...
##   ..$ expected: chr [1:1666558] "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" ...
##   ..$ actual  : chr [1:1666558] "Burgenland" "Burgenland" "Burgenland" "Burgenland" ...
##   ..$ file    : chr [1:1666558] "'../datasets/timeseries.csv'" "'../datasets/timeseries.csv'" "'../datasets/timeseries.csv'" "'../datasets/timeseries.csv'" ...
##  - attr(*, "na.action")= 'omit' Named int [1:894] 1 2 3 4 5 6 7 8 9 10 ...
##   ..- attr(*, "names")= chr [1:894] "1" "2" "3" "4" ...
range(data$cases)
## [1]    237 613076
range(data$deaths)
## [1]     1 25250
range(data$recovered)
## [1]      2 266287
data_cases_state = data

SEIR Model Parameters

model_out = read_csv("../datasets/model_out.csv")
str(model_out)
## tibble [73 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ state    : chr [1:73] "Arizona" "Arizona" "Arizona" "Arizona" ...
##  $ startdate: Date[1:73], format: "2020-04-16" "2020-05-01" ...
##  $ enddate  : Date[1:73], format: "2020-04-30" "2020-05-15" ...
##  $ k        : num [1:73] 0.783 0.863 0.865 0.949 0.986 ...
##  $ sigma    : num [1:73] 0.0714 0.0714 0.0714 0.0714 0.0714 ...
##  $ lamda    : num [1:73] 0.0563 0.07 0.1266 0.0784 0.0925 ...
##  $ c        : num [1:73] 1 0 0.127 1 0 ...
##  $ alpha    : num [1:73] 0.06825 0.00721 0.06765 0.10224 0.05641 ...
##  $ omega    : num [1:73] 0.00275 0.003091 0.001429 0.001371 0.000698 ...
##  $ miu      : num [1:73] 0.0384 0.0175 0.0338 0.036 0.014 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   state = col_character(),
##   ..   startdate = col_date(format = ""),
##   ..   enddate = col_date(format = ""),
##   ..   k = col_double(),
##   ..   sigma = col_double(),
##   ..   lamda = col_double(),
##   ..   c = col_double(),
##   ..   alpha = col_double(),
##   ..   omega = col_double(),
##   ..   miu = col_double()
##   .. )
data_parm = model_out

Combined Data of Intersts (State-Level)

data = data_parm

n = length(data$state)
data$cases = rep(0, n)
data$deaths = rep(0, n)
data$recovered = rep(0, n)

for (i in 1:n){
  j = data_cases_state$state == data$state[i] & data_cases_state$date == data$startdate[i]
  if (sum(j) == 0){
    data[i, ] = NA
  }else{
    data[i, c("cases", "deaths", "recovered")] = data_cases_state[j, c("cases", "deaths", "recovered")]
  }
}

data = na.omit(data)

data_demographic_state = as.data.frame(matrix(nrow = length(data$state), ncol = length(colnames(data_demographic_county)) - 1))
colnames(data_demographic_state) = colnames(data_demographic_county)[colnames(data_demographic_county) != "CountyName"]
data_demographic_state$State = data$state

for (s in data_demographic_state$State){
  for (k in 2:ncol(data_demographic_state)){
    data_demographic_state[data_demographic_state$State == s, k] = 
      mean(unlist(data_demographic_county[data_demographic_county$State == s, k+1]))
  }
}

data = cbind(data, 
             subset(data_demographic_state[, colnames(data_demographic_state)[colnames(data_demographic_state) != "State"]]))
data = na.omit(data)
data$days = rep(0, nrow(data))
data$temp = rep(0, nrow(data))
for (i in 1:nrow(data)){
  data$temp = diff.Date(c(data[i, ]$startdate, data[i, ]$enddate))
}
for (s in data$state){
  for (d in 1:nrow(subset(data, state == s))){
    data[data$state == s, "days"][d] = sum(subset(data, state == s)[1:d, "temp"])
  }
}
data = data[, colnames(data)[colnames(data) != "temp"]]
data_unamed = data[, colnames(data)[!(colnames(data) %in% c("state", "startdate", "enddate"))]]
# head(data_demographic_county, 10)
# head(data_demographic_state, 10)
# head(data[, c("state", "startdate", "enddate", "days")], 10)
head(data, 10)
##         state  startdate    enddate      k   sigma   lamda       c   alpha
## 1     Arizona 2020-04-16 2020-04-30 0.7834 0.07143 0.05633 1.00000 0.06825
## 2     Arizona 2020-05-01 2020-05-15 0.8631 0.07143 0.07001 0.00000 0.00721
## 3     Arizona 2020-05-16 2020-05-30 0.8653 0.07143 0.12657 0.12709 0.06765
## 4     Arizona 2020-06-01 2020-06-15 0.9492 0.07143 0.07839 1.00000 0.10224
## 5     Arizona 2020-06-16 2020-06-30 0.9864 0.07143 0.09253 0.00000 0.05641
## 6     Arizona 2020-07-01 2020-07-15 0.9732 0.07143 0.06093 0.00000 0.00000
## 7  California 2020-04-16 2020-04-30 0.8574 0.07143 0.07211 0.07752 0.02728
## 8  California 2020-05-01 2020-05-15 0.9481 0.07143 0.02832 1.00000 0.01925
## 9  California 2020-05-16 2020-05-30 0.9598 0.07143 0.04972 0.40540 0.03497
## 10 California 2020-06-01 2020-06-15 1.1270 0.07143 0.10206 0.01176 0.03073
##        omega      miu  cases deaths recovered POP_LATITUDE POP_LONGITUDE
## 1  0.0027505 0.038429   4234    150       460        33.58        -111.5
## 2  0.0030913 0.017520   7962    330      1528        33.58        -111.5
## 3  0.0014293 0.033751  13631    679      3357        33.58        -111.5
## 4  0.0013712 0.036006  20123    917      4869        33.58        -111.5
## 5  0.0006979 0.013973  39097   1219      6598        33.58        -111.5
## 6  0.0000000 0.003805  84092   1720      9715        33.58        -111.5
## 7  0.0023628 0.007923  28035    970      1753        37.82        -120.9
## 8  0.0013106 0.006909  52152   2131      5130        37.82        -120.9
## 9  0.0009157 0.012128  78704   3207      9098        37.82        -120.9
## 10 0.0006310 0.012709 115032   4219     17585        37.82        -120.9
##    PopulationEstimate2018 PopTotalMale2017 PopulationEstimate_above65_2017
## 1                  478110           232553                           80116
## 2                  478110           232553                           80116
## 3                  478110           232553                           80116
## 4                  478110           232553                           80116
## 5                  478110           232553                           80116
## 6                  478110           232553                           80116
## 7                  682018           338751                           94920
## 8                  682018           338751                           94920
## 9                  682018           338751                           94920
## 10                 682018           338751                           94920
##    PopulationDensityperSqMile2010 DiabetesPercentage Smokers_Percentage
## 1                           52.05             10.060              16.48
## 2                           52.05             10.060              16.48
## 3                           52.05             10.060              16.48
## 4                           52.05             10.060              16.48
## 5                           52.05             10.060              16.48
## 6                           52.05             10.060              16.48
## 7                          663.26              8.505              12.09
## 8                          663.26              8.505              12.09
## 9                          663.26              8.505              12.09
## 10                         663.26              8.505              12.09
##    HeartDiseaseMortality StrokeMortality Hospitals ICU_beds
## 1                  148.8           30.90     5.067    103.9
## 2                  148.8           30.90     5.067    103.9
## 3                  148.8           30.90     5.067    103.9
## 4                  148.8           30.90     5.067    103.9
## 5                  148.8           30.90     5.067    103.9
## 6                  148.8           30.90     5.067    103.9
## 7                  153.9           37.89     5.672    126.5
## 8                  153.9           37.89     5.672    126.5
## 9                  153.9           37.89     5.672    126.5
## 10                 153.9           37.89     5.672    126.5
##    HospParticipatinginNetwork2017 stay_at_home above_50_gatherings
## 1                           1.800           12                   2
## 2                           1.800           12                   2
## 3                           1.800           12                   2
## 4                           1.800           12                   2
## 5                           1.800           12                   2
## 6                           1.800           12                   2
## 7                           1.845            0                   4
## 8                           1.845            0                   4
## 9                           1.845            0                   4
## 10                          1.845            0                   4
##    above_500_gatherings restaurant_dine_in entertainment_gym days
## 1                     6                  7                 7   14
## 2                     6                  7                 7   28
## 3                     6                  7                 7   42
## 4                     6                  7                 7   56
## 5                     6                  7                 7   70
## 6                     6                  7                 7   84
## 7                     8                  3                 3   14
## 8                     8                  3                 3   28
## 9                     8                  3                 3   42
## 10                    8                  3                 3   56

Methods: Modeling & Testing Procedures (abridged)

Data Points pairs() Plot

set.seed(42)
num_obs = nrow(data_unamed) # total number of observations
num_trn = round(num_obs * 0.90) # number of observations for the training data

trn_idx = sample(num_obs, num_trn) # randomly generate the index for the training data
data_trn = data_unamed[trn_idx, ] # training data
data_tst = data_unamed[-trn_idx, ] # testing data

Modeling k

# full additive model
mod_k_full = lm(k ~ ., data = data_trn)
test_mod(mod_k_full, k = 1)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.221852   0.475015   0.016221   0.000191  29.000000   0.149099  14.731106
summary(mod_k_full)
## 
## Call:
## lm(formula = k ~ ., data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4647 -0.0452  0.0073  0.0503  0.2077 
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.62e+01   3.93e+00    4.13  0.00019 ***
## sigma                                  NA         NA      NA       NA    
## lamda                           -6.46e+00   1.20e+00   -5.41  3.5e-06 ***
## c                               -3.05e-01   7.17e-02   -4.26  0.00012 ***
## alpha                            3.08e+00   1.02e+00    3.00  0.00465 ** 
## omega                            2.32e+01   2.59e+01    0.89  0.37630    
## miu                              4.41e+00   1.99e+00    2.21  0.03272 *  
## cases                           -4.57e-07   1.64e-06   -0.28  0.78232    
## deaths                           1.32e-05   1.91e-05    0.69  0.49356    
## recovered                       -1.54e-06   6.60e-06   -0.23  0.81645    
## POP_LATITUDE                    -2.01e-01   5.03e-02   -3.99  0.00028 ***
## POP_LONGITUDE                    6.94e-02   1.69e-02    4.11  0.00019 ***
## PopulationEstimate2018          -1.13e-04   2.90e-05   -3.91  0.00036 ***
## PopTotalMale2017                 2.15e-04   5.58e-05    3.85  0.00043 ***
## PopulationEstimate_above65_2017  7.20e-05   1.76e-05    4.10  0.00020 ***
## PopulationDensityperSqMile2010   2.45e-05   1.41e-04    0.17  0.86330    
## DiabetesPercentage              -8.36e-01   2.00e-01   -4.19  0.00015 ***
## Smokers_Percentage               3.32e-01   8.49e-02    3.91  0.00036 ***
## HeartDiseaseMortality           -2.90e-02   9.27e-03   -3.13  0.00331 ** 
## StrokeMortality                  1.61e-01   4.45e-02    3.62  0.00083 ***
## Hospitals                              NA         NA      NA       NA    
## ICU_beds                               NA         NA      NA       NA    
## HospParticipatinginNetwork2017         NA         NA      NA       NA    
## stay_at_home                           NA         NA      NA       NA    
## above_50_gatherings                    NA         NA      NA       NA    
## above_500_gatherings                   NA         NA      NA       NA    
## restaurant_dine_in                     NA         NA      NA       NA    
## entertainment_gym                      NA         NA      NA       NA    
## days                             3.16e-03   1.46e-03    2.17  0.03613 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.136 on 39 degrees of freedom
## Multiple R-squared:  0.647,  Adjusted R-squared:  0.475 
## F-statistic: 3.76 on 19 and 39 DF,  p-value: 0.000227
# small additive model
mod_k_1 = lm(k ~ days, data = data_trn)
# large additive model
mod_k_2 = lm(k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)

test_mod(mod_k_1, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  1.867e-01  2.328e-02  9.083e-01  4.534e-13  2.000e+00  5.830e-02  5.580e+00
test_mod(mod_k_2, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.221852   0.475015   0.016221   0.000191  20.000000   0.149099  14.731106
summary(mod_k_1)
## 
## Call:
## lm(formula = k ~ days, data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2560 -0.0326  0.0395  0.0661  0.2381 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.805555   0.054607   14.75   <2e-16 ***
## days        0.001488   0.000964    1.54     0.13    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.186 on 57 degrees of freedom
## Multiple R-squared:  0.0401, Adjusted R-squared:  0.0233 
## F-statistic: 2.38 on 1 and 57 DF,  p-value: 0.128
summary(mod_k_2)
## 
## Call:
## lm(formula = k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4647 -0.0452  0.0073  0.0503  0.2077 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.62e+01   3.93e+00    4.13  0.00019 ***
## lamda                           -6.46e+00   1.20e+00   -5.41  3.5e-06 ***
## c                               -3.05e-01   7.17e-02   -4.26  0.00012 ***
## alpha                            3.08e+00   1.02e+00    3.00  0.00465 ** 
## omega                            2.32e+01   2.59e+01    0.89  0.37630    
## miu                              4.41e+00   1.99e+00    2.21  0.03272 *  
## cases                           -4.57e-07   1.64e-06   -0.28  0.78232    
## deaths                           1.32e-05   1.91e-05    0.69  0.49356    
## recovered                       -1.54e-06   6.60e-06   -0.23  0.81645    
## POP_LATITUDE                    -2.01e-01   5.03e-02   -3.99  0.00028 ***
## POP_LONGITUDE                    6.94e-02   1.69e-02    4.11  0.00019 ***
## PopulationEstimate2018          -1.13e-04   2.90e-05   -3.91  0.00036 ***
## PopTotalMale2017                 2.15e-04   5.58e-05    3.85  0.00043 ***
## PopulationEstimate_above65_2017  7.20e-05   1.76e-05    4.10  0.00020 ***
## PopulationDensityperSqMile2010   2.45e-05   1.41e-04    0.17  0.86330    
## DiabetesPercentage              -8.36e-01   2.00e-01   -4.19  0.00015 ***
## Smokers_Percentage               3.32e-01   8.49e-02    3.91  0.00036 ***
## HeartDiseaseMortality           -2.90e-02   9.27e-03   -3.13  0.00331 ** 
## StrokeMortality                  1.61e-01   4.45e-02    3.62  0.00083 ***
## days                             3.16e-03   1.46e-03    2.17  0.03613 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.136 on 39 degrees of freedom
## Multiple R-squared:  0.647,  Adjusted R-squared:  0.475 
## F-statistic: 3.76 on 19 and 39 DF,  p-value: 0.000227
# intermediate model
mod_k_3 = lm(k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 
             + log(days) - days
             + stay_at_home - PopulationEstimate2018 - PopTotalMale2017
             + I(POP_LATITUDE ^ 2) +  + I(POP_LONGITUDE ^ 2), 
             data = data_trn)
mod_k_4 = lm(k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 + 
               log(days) - days, 
             data = data_trn)

test_mod(mod_k_1, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  1.867e-01  2.328e-02  9.083e-01  4.534e-13  2.000e+00  5.830e-02  5.580e+00
test_mod(mod_k_2, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.221852   0.475015   0.016221   0.000191  20.000000   0.149099  14.731106
test_mod(mod_k_3, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.2054978  0.5247754  0.0017914  0.0001737 16.0000000  0.1552475 14.6026990
test_mod(mod_k_4, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.2042750  0.5311550  0.0009837  0.0004907 15.0000000  0.1546657 14.4151641
summary(mod_k_3)
## 
## Call:
## lm(formula = k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days + stay_at_home - PopulationEstimate2018 - 
##     PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2), 
##     data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4742 -0.0548  0.0072  0.0526  0.2431 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.13e+03   4.10e+02    2.75   0.0087 ** 
## lamda                           -6.79e+00   1.13e+00   -6.02  3.4e-07 ***
## c                               -3.12e-01   6.69e-02   -4.67  2.9e-05 ***
## alpha                            3.83e+00   8.45e-01    4.54  4.5e-05 ***
## miu                              4.58e+00   1.71e+00    2.69   0.0102 *  
## POP_LATITUDE                    -2.11e+01   7.52e+00   -2.80   0.0076 ** 
## POP_LONGITUDE                    1.51e+01   5.58e+00    2.70   0.0098 ** 
## PopulationEstimate_above65_2017 -1.64e-04   6.58e-05   -2.50   0.0163 *  
## DiabetesPercentage              -8.55e+00   3.03e+00   -2.82   0.0072 ** 
## Smokers_Percentage               4.53e+00   1.62e+00    2.80   0.0077 ** 
## HeartDiseaseMortality           -8.73e-02   2.73e-02   -3.20   0.0026 ** 
## StrokeMortality                  4.52e-01   1.34e-01    3.38   0.0016 ** 
## log(days)                        1.02e-01   2.99e-02    3.41   0.0014 ** 
## stay_at_home                     1.14e+00   4.42e-01    2.59   0.0129 *  
## I(POP_LATITUDE^2)                2.65e-01   9.49e-02    2.79   0.0078 ** 
## I(POP_LONGITUDE^2)               7.85e-02   2.91e-02    2.70   0.0100 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.13 on 43 degrees of freedom
## Multiple R-squared:  0.648,  Adjusted R-squared:  0.525 
## F-statistic: 5.27 on 15 and 43 DF,  p-value: 8.7e-06
summary(mod_k_4)
## 
## Call:
## lm(formula = k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days, data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4732 -0.0497 -0.0017  0.0607  0.2366 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     15.3989580  3.5627641    4.32  8.7e-05 ***
## lamda                           -6.8317626  1.1179898   -6.11  2.3e-07 ***
## c                               -0.3221741  0.0646726   -4.98  1.0e-05 ***
## alpha                            3.6975440  0.8116795    4.56  4.1e-05 ***
## miu                              4.9379779  1.6006925    3.08  0.00351 ** 
## POP_LATITUDE                    -0.1920080  0.0440854   -4.36  7.8e-05 ***
## POP_LONGITUDE                    0.0662907  0.0153167    4.33  8.5e-05 ***
## PopulationEstimate2018          -0.0001105  0.0000270   -4.09  0.00018 ***
## PopTotalMale2017                 0.0002107  0.0000518    4.06  0.00020 ***
## PopulationEstimate_above65_2017  0.0000669  0.0000154    4.35  8.1e-05 ***
## DiabetesPercentage              -0.7787347  0.1646073   -4.73  2.3e-05 ***
## Smokers_Percentage               0.3062246  0.0605033    5.06  7.9e-06 ***
## HeartDiseaseMortality           -0.0257963  0.0058214   -4.43  6.1e-05 ***
## StrokeMortality                  0.1439159  0.0288281    4.99  9.9e-06 ***
## log(days)                        0.0993978  0.0294363    3.38  0.00154 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.129 on 44 degrees of freedom
## Multiple R-squared:  0.644,  Adjusted R-squared:  0.531 
## F-statistic: 5.69 on 14 and 44 DF,  p-value: 4.16e-06
# intermediate model 
# relatively bad models
mod_k_5 = lm(k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - (lamda + c + alpha + omega + miu), 
             data = data_trn)
mod_k_6 = lm(k ~ lamda + c + alpha + omega + miu, 
             data = data_trn)

test_mod(mod_k_1, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  1.867e-01  2.328e-02  9.083e-01  4.534e-13  2.000e+00  5.830e-02  5.580e+00
test_mod(mod_k_2, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.221852   0.475015   0.016221   0.000191  20.000000   0.149099  14.731106
test_mod(mod_k_3, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.2054978  0.5247754  0.0017914  0.0001737 16.0000000  0.1552475 14.6026990
test_mod(mod_k_4, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.2042750  0.5311550  0.0009837  0.0004907 15.0000000  0.1546657 14.4151641
test_mod(mod_k_5, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  2.004e-01  1.101e-01  2.567e-01  2.605e-12  1.500e+01  9.543e-02  7.160e+00
test_mod(mod_k_6, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  2.135e-01  8.328e-02  1.932e-02  7.076e-10  6.000e+00  5.237e-02  5.250e+00
summary(mod_k_5)
## 
## Call:
## lm(formula = k ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - (lamda + 
##     c + alpha + omega + miu), data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9987 -0.0268  0.0054  0.0370  0.2963 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      8.44e+00   4.21e+00    2.01    0.051 .
## cases                            5.03e-07   1.92e-06    0.26    0.795  
## deaths                           1.45e-05   2.31e-05    0.63    0.533  
## recovered                       -5.29e-06   7.18e-06   -0.74    0.465  
## POP_LATITUDE                    -9.83e-02   5.46e-02   -1.80    0.079 .
## POP_LONGITUDE                    3.81e-02   1.82e-02    2.10    0.042 *
## PopulationEstimate2018          -5.57e-05   2.94e-05   -1.90    0.064 .
## PopTotalMale2017                 1.06e-04   5.64e-05    1.88    0.067 .
## PopulationEstimate_above65_2017  3.67e-05   2.00e-05    1.83    0.073 .
## PopulationDensityperSqMile2010  -2.84e-05   1.52e-04   -0.19    0.852  
## DiabetesPercentage              -4.58e-01   2.28e-01   -2.01    0.051 .
## Smokers_Percentage               1.93e-01   9.88e-02    1.96    0.057 .
## HeartDiseaseMortality           -1.62e-02   1.09e-02   -1.48    0.147  
## StrokeMortality                  9.04e-02   5.25e-02    1.72    0.092 .
## days                             2.01e-03   1.60e-03    1.25    0.216  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.177 on 44 degrees of freedom
## Multiple R-squared:  0.325,  Adjusted R-squared:  0.11 
## F-statistic: 1.51 on 14 and 44 DF,  p-value: 0.147
summary(mod_k_6)
## 
## Call:
## lm(formula = k ~ lamda + c + alpha + omega + miu, data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0289 -0.0445  0.0356  0.0886  0.2567 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.1002     0.0791   13.90   <2e-16 ***
## lamda        -2.7175     1.2218   -2.22    0.030 *  
## c            -0.1608     0.0772   -2.08    0.042 *  
## alpha         2.0142     1.1260    1.79    0.079 .  
## omega       -36.1277    24.4670   -1.48    0.146    
## miu           0.8043     1.0704    0.75    0.456    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.18 on 53 degrees of freedom
## Multiple R-squared:  0.162,  Adjusted R-squared:  0.0833 
## F-statistic: 2.05 on 5 and 53 DF,  p-value: 0.0859
mod_k = mod_k_6
# diagnostics(mod_k, testit = FALSE)

Modeling sigma

In this case, sigma is set to an empirical value. Modeling excluded.

Modeling lamda

# full additive model
mod_lamda_full = lm(lamda ~ ., data = data_trn)
test_mod(mod_lamda_full, k = 3)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.02250    0.79955    0.17494    0.03059   29.00000    0.01670   26.37237
summary(mod_lamda_full)
## 
## Call:
## lm(formula = lamda ~ ., data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03239 -0.00625 -0.00057  0.00673  0.02290 
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.64e+00   3.98e-01    4.13  0.00018 ***
## k                               -6.63e-02   1.23e-02   -5.41  3.5e-06 ***
## sigma                                  NA         NA      NA       NA    
## c                               -4.39e-02   5.28e-03   -8.31  3.7e-10 ***
## alpha                            3.96e-01   9.62e-02    4.11  0.00020 ***
## omega                            4.98e-01   2.65e+00    0.19  0.85212    
## miu                              7.71e-01   1.75e-01    4.41  7.8e-05 ***
## cases                           -7.34e-08   1.66e-07   -0.44  0.66132    
## deaths                           4.80e-07   1.94e-06    0.25  0.80609    
## recovered                        1.10e-07   6.69e-07    0.16  0.86987    
## POP_LATITUDE                    -2.08e-02   5.06e-03   -4.11  0.00020 ***
## POP_LONGITUDE                    6.72e-03   1.74e-03    3.86  0.00042 ***
## PopulationEstimate2018          -1.17e-05   2.91e-06   -4.04  0.00025 ***
## PopTotalMale2017                 2.23e-05   5.60e-06    3.98  0.00029 ***
## PopulationEstimate_above65_2017  7.26e-06   1.78e-06    4.08  0.00022 ***
## PopulationDensityperSqMile2010   2.75e-06   1.43e-05    0.19  0.84842    
## DiabetesPercentage              -7.94e-02   2.08e-02   -3.83  0.00046 ***
## Smokers_Percentage               3.01e-02   8.93e-03    3.37  0.00170 ** 
## HeartDiseaseMortality           -2.66e-03   9.61e-04   -2.77  0.00854 ** 
## StrokeMortality                  1.48e-02   4.65e-03    3.17  0.00292 ** 
## Hospitals                              NA         NA      NA       NA    
## ICU_beds                               NA         NA      NA       NA    
## HospParticipatinginNetwork2017         NA         NA      NA       NA    
## stay_at_home                           NA         NA      NA       NA    
## above_50_gatherings                    NA         NA      NA       NA    
## above_500_gatherings                   NA         NA      NA       NA    
## restaurant_dine_in                     NA         NA      NA       NA    
## entertainment_gym                      NA         NA      NA       NA    
## days                             1.95e-04   1.53e-04    1.28  0.20967    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0138 on 39 degrees of freedom
## Multiple R-squared:  0.865,  Adjusted R-squared:   0.8 
## F-statistic: 13.2 on 19 and 39 DF,  p-value: 1.57e-11
# small additive model
mod_lamda_1 = lm(lamda ~ days, data = data_trn)
# large additive model
mod_lamda_2 = lm(lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)

test_mod(mod_lamda_1, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.031384  -0.003521   0.231445   0.727519   2.000000   0.024319  39.801177
test_mod(mod_lamda_2, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.02250    0.79955    0.17494    0.03059   20.00000    0.01670   26.37237
summary(mod_lamda_1)
## 
## Call:
## lm(formula = lamda ~ days, data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.0627 -0.0235  0.0031  0.0221  0.0679 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.075259   0.009075    8.29  2.2e-11 ***
## days        -0.000143   0.000160   -0.89     0.38    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0309 on 57 degrees of freedom
## Multiple R-squared:  0.0138, Adjusted R-squared:  -0.00352 
## F-statistic: 0.797 on 1 and 57 DF,  p-value: 0.376
summary(mod_lamda_2)
## 
## Call:
## lm(formula = lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03239 -0.00625 -0.00057  0.00673  0.02290 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.64e+00   3.98e-01    4.13  0.00018 ***
## k                               -6.63e-02   1.23e-02   -5.41  3.5e-06 ***
## c                               -4.39e-02   5.28e-03   -8.31  3.7e-10 ***
## alpha                            3.96e-01   9.62e-02    4.11  0.00020 ***
## omega                            4.98e-01   2.65e+00    0.19  0.85212    
## miu                              7.71e-01   1.75e-01    4.41  7.8e-05 ***
## cases                           -7.34e-08   1.66e-07   -0.44  0.66132    
## deaths                           4.80e-07   1.94e-06    0.25  0.80609    
## recovered                        1.10e-07   6.69e-07    0.16  0.86987    
## POP_LATITUDE                    -2.08e-02   5.06e-03   -4.11  0.00020 ***
## POP_LONGITUDE                    6.72e-03   1.74e-03    3.86  0.00042 ***
## PopulationEstimate2018          -1.17e-05   2.91e-06   -4.04  0.00025 ***
## PopTotalMale2017                 2.23e-05   5.60e-06    3.98  0.00029 ***
## PopulationEstimate_above65_2017  7.26e-06   1.78e-06    4.08  0.00022 ***
## PopulationDensityperSqMile2010   2.75e-06   1.43e-05    0.19  0.84842    
## DiabetesPercentage              -7.94e-02   2.08e-02   -3.83  0.00046 ***
## Smokers_Percentage               3.01e-02   8.93e-03    3.37  0.00170 ** 
## HeartDiseaseMortality           -2.66e-03   9.61e-04   -2.77  0.00854 ** 
## StrokeMortality                  1.48e-02   4.65e-03    3.17  0.00292 ** 
## days                             1.95e-04   1.53e-04    1.28  0.20967    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0138 on 39 degrees of freedom
## Multiple R-squared:  0.865,  Adjusted R-squared:   0.8 
## F-statistic: 13.2 on 19 and 39 DF,  p-value: 1.57e-11
# intermediate model
mod_lamda_3 = lm(lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 
             + log(days) - days
             + stay_at_home - PopulationEstimate2018 - PopTotalMale2017
             + I(POP_LATITUDE ^ 2) +  + I(POP_LONGITUDE ^ 2), 
             data = data_trn)
mod_lamda_4 = lm(lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 + 
               log(days) - days, 
             data = data_trn)

test_mod(mod_lamda_1, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.031384  -0.003521   0.231445   0.727519   2.000000   0.024319  39.801177
test_mod(mod_lamda_2, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.02250    0.79955    0.17494    0.03059   20.00000    0.01670   26.37237
test_mod(mod_lamda_3, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.01787    0.82455    0.20647    0.01453   16.00000    0.01624   24.42386
test_mod(mod_lamda_4, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.01745    0.82845    0.19372    0.01659   15.00000    0.01620   24.31619
summary(mod_lamda_3)
## 
## Call:
## lm(formula = lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days + stay_at_home - PopulationEstimate2018 - 
##     PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2), 
##     data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03394 -0.00630 -0.00201  0.00661  0.02268 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.38e+02   3.90e+01    3.53  0.00099 ***
## k                               -6.74e-02   1.12e-02   -6.02  3.4e-07 ***
## c                               -4.40e-02   4.67e-03   -9.43  4.9e-12 ***
## alpha                            4.21e-01   7.96e-02    5.29  3.9e-06 ***
## miu                              7.92e-01   1.38e-01    5.73  9.2e-07 ***
## POP_LATITUDE                    -2.57e+00   7.15e-01   -3.59  0.00084 ***
## POP_LONGITUDE                    1.85e+00   5.31e-01    3.49  0.00115 ** 
## PopulationEstimate_above65_2017 -2.07e-05   6.27e-06   -3.29  0.00199 ** 
## DiabetesPercentage              -1.04e+00   2.88e-01   -3.59  0.00084 ***
## Smokers_Percentage               5.50e-01   1.54e-01    3.57  0.00090 ***
## HeartDiseaseMortality           -1.02e-02   2.59e-03   -3.93  0.00031 ***
## StrokeMortality                  5.21e-02   1.27e-02    4.09  0.00018 ***
## log(days)                        7.81e-03   3.14e-03    2.49  0.01682 *  
## stay_at_home                     1.42e-01   4.21e-02    3.37  0.00158 ** 
## I(POP_LATITUDE^2)                3.22e-02   9.02e-03    3.58  0.00088 ***
## I(POP_LONGITUDE^2)               9.63e-03   2.77e-03    3.48  0.00117 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0129 on 43 degrees of freedom
## Multiple R-squared:  0.87,   Adjusted R-squared:  0.825 
## F-statistic: 19.2 on 15 and 43 DF,  p-value: 2.6e-14
summary(mod_lamda_4)
## 
## Call:
## lm(formula = lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03373 -0.00652 -0.00214  0.00673  0.02291 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.63e+00   3.43e-01    4.74  2.3e-05 ***
## k                               -6.72e-02   1.10e-02   -6.11  2.3e-07 ***
## c                               -4.42e-02   4.46e-03   -9.92  8.6e-13 ***
## alpha                            4.18e-01   7.46e-02    5.59  1.3e-06 ***
## miu                              8.00e-01   1.27e-01    6.30  1.2e-07 ***
## POP_LATITUDE                    -2.07e-02   4.19e-03   -4.95  1.1e-05 ***
## POP_LONGITUDE                    6.72e-03   1.50e-03    4.46  5.5e-05 ***
## PopulationEstimate2018          -1.16e-05   2.61e-06   -4.46  5.6e-05 ***
## PopTotalMale2017                 2.21e-05   5.02e-06    4.41  6.6e-05 ***
## PopulationEstimate_above65_2017  7.13e-06   1.47e-06    4.84  1.6e-05 ***
## DiabetesPercentage              -7.93e-02   1.61e-02   -4.93  1.2e-05 ***
## Smokers_Percentage               2.99e-02   6.06e-03    4.93  1.2e-05 ***
## HeartDiseaseMortality           -2.60e-03   5.73e-04   -4.53  4.5e-05 ***
## StrokeMortality                  1.45e-02   2.83e-03    5.13  6.3e-06 ***
## log(days)                        7.73e-03   3.06e-03    2.53    0.015 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0128 on 44 degrees of freedom
## Multiple R-squared:  0.87,   Adjusted R-squared:  0.828 
## F-statistic:   21 on 14 and 44 DF,  p-value: 5.59e-15
# intermediate model 
# relatively bad models
mod_lamda_5 = lm(lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - (k + c + alpha + omega + miu), 
             data = data_trn)
mod_lamda_6 = lm(lamda ~  k + c + alpha + omega + miu, 
             data = data_trn)

test_mod(mod_lamda_1, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.031384  -0.003521   0.231445   0.727519   2.000000   0.024319  39.801177
test_mod(mod_lamda_2, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.02250    0.79955    0.17494    0.03059   20.00000    0.01670   26.37237
test_mod(mod_lamda_3, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.01787    0.82455    0.20647    0.01453   16.00000    0.01624   24.42386
test_mod(mod_lamda_4, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.01745    0.82845    0.19372    0.01659   15.00000    0.01620   24.31619
test_mod(mod_lamda_5, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.03121    0.22337    0.22277    0.23216   15.00000    0.02704   44.73045
test_mod(mod_lamda_6, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.02534    0.60579    0.10363    0.07028    6.00000    0.01508   18.16319
summary(mod_lamda_5)
## 
## Call:
## lm(formula = lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - (k + c + 
##     alpha + omega + miu), data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03981 -0.01853 -0.00046  0.01614  0.05226 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                      1.57e-01   6.44e-01    0.24     0.81
## cases                           -4.08e-08   2.94e-07   -0.14     0.89
## deaths                          -4.05e-06   3.53e-06   -1.15     0.26
## recovered                        1.29e-06   1.10e-06    1.18     0.25
## POP_LATITUDE                    -3.72e-03   8.36e-03   -0.45     0.66
## POP_LONGITUDE                    1.75e-04   2.78e-03    0.06     0.95
## PopulationEstimate2018           2.38e-06   4.50e-06    0.53     0.60
## PopTotalMale2017                -5.04e-06   8.63e-06   -0.58     0.56
## PopulationEstimate_above65_2017  1.05e-06   3.06e-06    0.34     0.73
## PopulationDensityperSqMile2010   3.05e-05   2.32e-05    1.31     0.20
## DiabetesPercentage              -1.83e-02   3.49e-02   -0.52     0.60
## Smokers_Percentage               1.44e-02   1.51e-02    0.95     0.35
## HeartDiseaseMortality           -1.85e-03   1.68e-03   -1.11     0.27
## StrokeMortality                  8.20e-03   8.05e-03    1.02     0.31
## days                            -2.56e-04   2.45e-04   -1.05     0.30
## 
## Residual standard error: 0.0272 on 44 degrees of freedom
## Multiple R-squared:  0.411,  Adjusted R-squared:  0.223 
## F-statistic: 2.19 on 14 and 44 DF,  p-value: 0.0242
summary(mod_lamda_6)
## 
## Call:
## lm(formula = lamda ~ k + c + alpha + omega + miu, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.05354 -0.01051  0.00503  0.01083  0.03496 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.08436    0.01422    5.93  2.3e-07 ***
## k           -0.03141    0.01412   -2.22  0.03042 *  
## c           -0.04153    0.00648   -6.41  3.9e-08 ***
## alpha        0.42203    0.11036    3.82  0.00035 ***
## omega       -2.19340    2.66718   -0.82  0.41455    
## miu          0.55378    0.08718    6.35  5.0e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0194 on 53 degrees of freedom
## Multiple R-squared:  0.64,   Adjusted R-squared:  0.606 
## F-statistic: 18.8 on 5 and 53 DF,  p-value: 1.03e-10
mod_lamda = mod_lamda_4
# diagnostics(mod_lamda, testit = FALSE)

Modeling c

# full additive model
mod_c_full = lm(c ~ ., data = data_trn)
test_mod(mod_c_full, k = 4)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##     0.3844     0.6300     0.1073     0.8302    29.0000     0.2652        Inf
summary(mod_c_full)
## 
## Call:
## lm(formula = c ~ ., data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4762 -0.1244 -0.0493  0.1559  0.4969 
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      2.48e+01   7.73e+00    3.22  0.00262 ** 
## k                               -1.04e+00   2.44e-01   -4.26  0.00012 ***
## sigma                                  NA         NA      NA       NA    
## lamda                           -1.46e+01   1.75e+00   -8.31  3.7e-10 ***
## alpha                            4.62e+00   1.96e+00    2.35  0.02379 *  
## omega                            1.76e+01   4.82e+01    0.37  0.71646    
## miu                              1.51e+01   3.05e+00    4.96  1.4e-05 ***
## cases                           -2.66e-06   3.00e-06   -0.89  0.38107    
## deaths                           5.05e-05   3.45e-05    1.46  0.15120    
## recovered                       -4.43e-06   1.22e-05   -0.36  0.71804    
## POP_LATITUDE                    -2.96e-01   9.95e-02   -2.97  0.00508 ** 
## POP_LONGITUDE                    1.03e-01   3.35e-02    3.06  0.00395 ** 
## PopulationEstimate2018          -2.06e-04   5.37e-05   -3.83  0.00045 ***
## PopTotalMale2017                 3.94e-04   1.03e-04    3.82  0.00047 ***
## PopulationEstimate_above65_2017  1.12e-04   3.43e-05    3.28  0.00220 ** 
## PopulationDensityperSqMile2010  -2.93e-04   2.56e-04   -1.14  0.25923    
## DiabetesPercentage              -1.07e+00   4.09e-01   -2.62  0.01258 *  
## Smokers_Percentage               3.28e-01   1.77e-01    1.85  0.07137 .  
## HeartDiseaseMortality           -2.41e-02   1.87e-02   -1.29  0.20631    
## StrokeMortality                  1.59e-01   9.14e-02    1.74  0.09055 .  
## Hospitals                              NA         NA      NA       NA    
## ICU_beds                               NA         NA      NA       NA    
## HospParticipatinginNetwork2017         NA         NA      NA       NA    
## stay_at_home                           NA         NA      NA       NA    
## above_50_gatherings                    NA         NA      NA       NA    
## above_500_gatherings                   NA         NA      NA       NA    
## restaurant_dine_in                     NA         NA      NA       NA    
## entertainment_gym                      NA         NA      NA       NA    
## days                             4.81e-03   2.74e-03    1.76  0.08674 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.251 on 39 degrees of freedom
## Multiple R-squared:  0.751,  Adjusted R-squared:  0.63 
## F-statistic:  6.2 on 19 and 39 DF,  p-value: 7.93e-07
# small additive model
mod_c_1 = lm(c ~ days, data = data_trn)
# large additive model
mod_c_2 = lm(c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)

test_mod(mod_c_1, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  4.204e-01 -6.359e-04  8.737e-01  1.839e-07  2.000e+00  4.396e-01        Inf
test_mod(mod_c_2, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##     0.3844     0.6300     0.1073     0.8302    20.0000     0.2652        Inf
summary(mod_c_1)
## 
## Call:
## lm(formula = c ~ days, data = data_trn)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.434 -0.295 -0.198  0.450  0.734 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.23630    0.12145    1.95    0.057 .
## days         0.00210    0.00214    0.98    0.331  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.414 on 57 degrees of freedom
## Multiple R-squared:  0.0166, Adjusted R-squared:  -0.000636 
## F-statistic: 0.963 on 1 and 57 DF,  p-value: 0.331
summary(mod_c_2)
## 
## Call:
## lm(formula = c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4762 -0.1244 -0.0493  0.1559  0.4969 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      2.48e+01   7.73e+00    3.22  0.00262 ** 
## k                               -1.04e+00   2.44e-01   -4.26  0.00012 ***
## lamda                           -1.46e+01   1.75e+00   -8.31  3.7e-10 ***
## alpha                            4.62e+00   1.96e+00    2.35  0.02379 *  
## omega                            1.76e+01   4.82e+01    0.37  0.71646    
## miu                              1.51e+01   3.05e+00    4.96  1.4e-05 ***
## cases                           -2.66e-06   3.00e-06   -0.89  0.38107    
## deaths                           5.05e-05   3.45e-05    1.46  0.15120    
## recovered                       -4.43e-06   1.22e-05   -0.36  0.71804    
## POP_LATITUDE                    -2.96e-01   9.95e-02   -2.97  0.00508 ** 
## POP_LONGITUDE                    1.03e-01   3.35e-02    3.06  0.00395 ** 
## PopulationEstimate2018          -2.06e-04   5.37e-05   -3.83  0.00045 ***
## PopTotalMale2017                 3.94e-04   1.03e-04    3.82  0.00047 ***
## PopulationEstimate_above65_2017  1.12e-04   3.43e-05    3.28  0.00220 ** 
## PopulationDensityperSqMile2010  -2.93e-04   2.56e-04   -1.14  0.25923    
## DiabetesPercentage              -1.07e+00   4.09e-01   -2.62  0.01258 *  
## Smokers_Percentage               3.28e-01   1.77e-01    1.85  0.07137 .  
## HeartDiseaseMortality           -2.41e-02   1.87e-02   -1.29  0.20631    
## StrokeMortality                  1.59e-01   9.14e-02    1.74  0.09055 .  
## days                             4.81e-03   2.74e-03    1.76  0.08674 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.251 on 39 degrees of freedom
## Multiple R-squared:  0.751,  Adjusted R-squared:  0.63 
## F-statistic:  6.2 on 19 and 39 DF,  p-value: 7.93e-07
# intermediate model
mod_c_3 = lm(c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 
             + log(days) - days
             + stay_at_home - PopulationEstimate2018 - PopTotalMale2017
             + I(POP_LATITUDE ^ 2) +  + I(POP_LONGITUDE ^ 2), 
             data = data_trn)
mod_c_4 = lm(c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 + 
               log(days) - days, 
             data = data_trn)

test_mod(mod_c_1, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  4.204e-01 -6.359e-04  8.737e-01  1.839e-07  2.000e+00  4.396e-01        Inf
test_mod(mod_c_2, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##     0.3844     0.6300     0.1073     0.8302    20.0000     0.2652        Inf
test_mod(mod_c_3, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##     0.3285     0.6605     0.2078     0.1439    16.0000     0.2592        Inf
test_mod(mod_c_4, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##     0.3132     0.6627     0.2233     0.1210    15.0000     0.2645        Inf
summary(mod_c_3)
## 
## Call:
## lm(formula = c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days + stay_at_home - PopulationEstimate2018 - 
##     PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2), 
##     data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5448 -0.1371 -0.0494  0.1446  0.4962 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      2.85e+03   7.01e+02    4.07  0.00020 ***
## k                               -1.08e+00   2.31e-01   -4.67  2.9e-05 ***
## lamda                           -1.53e+01   1.62e+00   -9.43  4.9e-12 ***
## alpha                            5.92e+00   1.68e+00    3.52  0.00103 ** 
## miu                              1.52e+01   2.52e+00    6.01  3.5e-07 ***
## POP_LATITUDE                    -5.29e+01   1.29e+01   -4.10  0.00018 ***
## POP_LONGITUDE                    3.86e+01   9.54e+00    4.04  0.00021 ***
## PopulationEstimate_above65_2017 -4.41e-04   1.12e-04   -3.93  0.00030 ***
## DiabetesPercentage              -2.13e+01   5.20e+00   -4.10  0.00018 ***
## Smokers_Percentage               1.14e+01   2.78e+00    4.09  0.00019 ***
## HeartDiseaseMortality           -2.00e-01   4.74e-02   -4.22  0.00013 ***
## StrokeMortality                  1.01e+00   2.33e-01    4.33  8.7e-05 ***
## log(days)                        1.40e-01   5.88e-02    2.39  0.02155 *  
## stay_at_home                     3.00e+00   7.54e-01    3.97  0.00027 ***
## I(POP_LATITUDE^2)                6.65e-01   1.62e-01    4.09  0.00018 ***
## I(POP_LONGITUDE^2)               2.01e-01   4.97e-02    4.04  0.00022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.241 on 43 degrees of freedom
## Multiple R-squared:  0.748,  Adjusted R-squared:  0.661 
## F-statistic: 8.52 on 15 and 43 DF,  p-value: 1.51e-08
summary(mod_c_4)
## 
## Call:
## lm(formula = c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days, data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5776 -0.1298 -0.0518  0.1315  0.5565 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      2.83e+01   6.68e+00    4.23  0.00012 ***
## k                               -1.12e+00   2.25e-01   -4.98  1.0e-05 ***
## lamda                           -1.56e+01   1.58e+00   -9.92  8.6e-13 ***
## alpha                            6.45e+00   1.56e+00    4.14  0.00015 ***
## miu                              1.46e+01   2.44e+00    6.00  3.3e-07 ***
## POP_LATITUDE                    -3.61e-01   8.18e-02   -4.42  6.4e-05 ***
## POP_LONGITUDE                    1.15e-01   2.93e-02    3.93  0.00029 ***
## PopulationEstimate2018          -2.08e-04   5.01e-05   -4.15  0.00015 ***
## PopTotalMale2017                 3.96e-04   9.64e-05    4.10  0.00017 ***
## PopulationEstimate_above65_2017  1.25e-04   2.86e-05    4.39  7.1e-05 ***
## DiabetesPercentage              -1.36e+00   3.16e-01   -4.31  9.2e-05 ***
## Smokers_Percentage               4.92e-01   1.21e-01    4.07  0.00019 ***
## HeartDiseaseMortality           -4.29e-02   1.13e-02   -3.78  0.00047 ***
## StrokeMortality                  2.43e-01   5.64e-02    4.32  8.8e-05 ***
## log(days)                        1.52e-01   5.72e-02    2.65  0.01113 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.24 on 44 degrees of freedom
## Multiple R-squared:  0.744,  Adjusted R-squared:  0.663 
## F-statistic: 9.14 on 14 and 44 DF,  p-value: 6.67e-09
# intermediate model 
# relatively bad models
mod_c_5 = lm(c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - (k + lamda + alpha + omega + miu), 
             data = data_trn)
mod_c_6 = lm(c ~  k + lamda + alpha + omega + miu, 
             data = data_trn)

test_mod(mod_c_1, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  4.204e-01 -6.359e-04  8.737e-01  1.839e-07  2.000e+00  4.396e-01        Inf
test_mod(mod_c_2, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##     0.3844     0.6300     0.1073     0.8302    20.0000     0.2652        Inf
test_mod(mod_c_3, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##     0.3285     0.6605     0.2078     0.1439    16.0000     0.2592        Inf
test_mod(mod_c_4, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##     0.3132     0.6627     0.2233     0.1210    15.0000     0.2645        Inf
test_mod(mod_c_5, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.511125  -0.046239   0.065749   0.007058  15.000000   0.448106        Inf
test_mod(mod_c_6, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.359923   0.444061   0.003014   0.128642   6.000000   0.266542        Inf
summary(mod_c_5)
## 
## Call:
## lm(formula = c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - (k + lamda + 
##     alpha + omega + miu), data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5860 -0.2928 -0.0346  0.2441  0.6879 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                     -8.59e+00   1.00e+01   -0.86    0.396  
## cases                           -9.08e-06   4.58e-06   -1.98    0.054 .
## deaths                           1.07e-04   5.50e-05    1.95    0.057 .
## recovered                        8.43e-06   1.71e-05    0.49    0.624  
## POP_LATITUDE                     1.40e-01   1.30e-01    1.08    0.287  
## POP_LONGITUDE                   -3.45e-02   4.33e-02   -0.80    0.430  
## PopulationEstimate2018          -2.53e-06   6.99e-05   -0.04    0.971  
## PopTotalMale2017                 7.75e-06   1.34e-04    0.06    0.954  
## PopulationEstimate_above65_2017 -1.82e-05   4.76e-05   -0.38    0.704  
## PopulationDensityperSqMile2010  -5.00e-04   3.61e-04   -1.38    0.173  
## DiabetesPercentage               5.57e-01   5.43e-01    1.03    0.310  
## Smokers_Percentage              -3.40e-01   2.35e-01   -1.45    0.155  
## HeartDiseaseMortality            3.89e-02   2.61e-02    1.49    0.143  
## StrokeMortality                 -1.52e-01   1.25e-01   -1.22    0.231  
## days                             3.81e-03   3.81e-03    1.00    0.323  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.423 on 44 degrees of freedom
## Multiple R-squared:  0.206,  Adjusted R-squared:  -0.0462 
## F-statistic: 0.817 on 14 and 44 DF,  p-value: 0.647
summary(mod_c_6)
## 
## Call:
## lm(formula = c ~ k + lamda + alpha + omega + miu, data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8087 -0.1702 -0.0649  0.1658  0.5454 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.146      0.246    4.66  2.2e-05 ***
## k             -0.471      0.226   -2.08    0.042 *  
## lamda        -10.522      1.640   -6.41  3.9e-08 ***
## alpha          4.111      1.902    2.16    0.035 *  
## omega        -36.160     42.431   -0.85    0.398    
## miu            7.844      1.493    5.25  2.7e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.308 on 53 degrees of freedom
## Multiple R-squared:  0.492,  Adjusted R-squared:  0.444 
## F-statistic: 10.3 on 5 and 53 DF,  p-value: 6.44e-07
mod_c = mod_c_3
# diagnostics(mod_c, testit = FALSE)

Modeling alpha

# full additive model
mod_alpha_full = lm(alpha ~ ., data = data_trn)
test_mod(mod_alpha_full, k = 5)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.02896    0.51252    0.02816    0.05511   29.00000    0.02461        Inf
summary(mod_alpha_full)
## 
## Call:
## lm(formula = alpha ~ ., data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03333 -0.00934  0.00033  0.00861  0.04447 
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.47e+00   6.20e-01   -2.36   0.0231 *  
## k                                6.10e-02   2.03e-02    3.00   0.0046 ** 
## sigma                                  NA         NA      NA       NA    
## lamda                            7.64e-01   1.86e-01    4.11   0.0002 ***
## c                                2.69e-02   1.14e-02    2.35   0.0238 *  
## omega                            8.14e+00   3.45e+00    2.36   0.0234 *  
## miu                             -2.87e-01   2.94e-01   -0.98   0.3354    
## cases                            1.56e-07   2.30e-07    0.68   0.5017    
## deaths                           2.61e-06   2.67e-06    0.98   0.3351    
## recovered                       -1.42e-06   9.02e-07   -1.57   0.1238    
## POP_LATITUDE                     2.06e-02   7.74e-03    2.66   0.0114 *  
## POP_LONGITUDE                   -5.75e-03   2.69e-03   -2.14   0.0389 *  
## PopulationEstimate2018           9.50e-06   4.56e-06    2.08   0.0439 *  
## PopTotalMale2017                -1.79e-05   8.77e-06   -2.04   0.0483 *  
## PopulationEstimate_above65_2017 -6.43e-06   2.77e-06   -2.32   0.0256 *  
## PopulationDensityperSqMile2010  -4.30e-05   1.86e-05   -2.31   0.0265 *  
## DiabetesPercentage               8.44e-02   3.10e-02    2.72   0.0097 ** 
## Smokers_Percentage              -3.85e-02   1.27e-02   -3.03   0.0043 ** 
## HeartDiseaseMortality            3.77e-03   1.33e-03    2.83   0.0072 ** 
## StrokeMortality                 -1.93e-02   6.55e-03   -2.94   0.0054 ** 
## Hospitals                              NA         NA      NA       NA    
## ICU_beds                               NA         NA      NA       NA    
## HospParticipatinginNetwork2017         NA         NA      NA       NA    
## stay_at_home                           NA         NA      NA       NA    
## above_50_gatherings                    NA         NA      NA       NA    
## above_500_gatherings                   NA         NA      NA       NA    
## restaurant_dine_in                     NA         NA      NA       NA    
## entertainment_gym                      NA         NA      NA       NA    
## days                             2.00e-05   2.17e-04    0.09   0.9272    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0192 on 39 degrees of freedom
## Multiple R-squared:  0.672,  Adjusted R-squared:  0.513 
## F-statistic: 4.21 on 19 and 39 DF,  p-value: 0.0000719
# small additive model
mod_alpha_1 = lm(alpha ~ days, data = data_trn)
# large additive model
mod_alpha_2 = lm(alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)

test_mod(mod_alpha_1, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  2.751e-02  4.329e-02  6.639e-02  1.278e-09  2.000e+00  2.533e-02        Inf
test_mod(mod_alpha_2, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.02896    0.51252    0.02816    0.05511   20.00000    0.02461        Inf
summary(mod_alpha_1)
## 
## Call:
## lm(formula = alpha ~ days, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.02535 -0.01478 -0.00667  0.00167  0.12315 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.029069   0.007897    3.68  0.00052 ***
## days        -0.000265   0.000139   -1.90  0.06200 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0269 on 57 degrees of freedom
## Multiple R-squared:  0.0598, Adjusted R-squared:  0.0433 
## F-statistic: 3.62 on 1 and 57 DF,  p-value: 0.062
summary(mod_alpha_2)
## 
## Call:
## lm(formula = alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03333 -0.00934  0.00033  0.00861  0.04447 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.47e+00   6.20e-01   -2.36   0.0231 *  
## k                                6.10e-02   2.03e-02    3.00   0.0046 ** 
## lamda                            7.64e-01   1.86e-01    4.11   0.0002 ***
## c                                2.69e-02   1.14e-02    2.35   0.0238 *  
## omega                            8.14e+00   3.45e+00    2.36   0.0234 *  
## miu                             -2.87e-01   2.94e-01   -0.98   0.3354    
## cases                            1.56e-07   2.30e-07    0.68   0.5017    
## deaths                           2.61e-06   2.67e-06    0.98   0.3351    
## recovered                       -1.42e-06   9.02e-07   -1.57   0.1238    
## POP_LATITUDE                     2.06e-02   7.74e-03    2.66   0.0114 *  
## POP_LONGITUDE                   -5.75e-03   2.69e-03   -2.14   0.0389 *  
## PopulationEstimate2018           9.50e-06   4.56e-06    2.08   0.0439 *  
## PopTotalMale2017                -1.79e-05   8.77e-06   -2.04   0.0483 *  
## PopulationEstimate_above65_2017 -6.43e-06   2.77e-06   -2.32   0.0256 *  
## PopulationDensityperSqMile2010  -4.30e-05   1.86e-05   -2.31   0.0265 *  
## DiabetesPercentage               8.44e-02   3.10e-02    2.72   0.0097 ** 
## Smokers_Percentage              -3.85e-02   1.27e-02   -3.03   0.0043 ** 
## HeartDiseaseMortality            3.77e-03   1.33e-03    2.83   0.0072 ** 
## StrokeMortality                 -1.93e-02   6.55e-03   -2.94   0.0054 ** 
## days                             2.00e-05   2.17e-04    0.09   0.9272    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0192 on 39 degrees of freedom
## Multiple R-squared:  0.672,  Adjusted R-squared:  0.513 
## F-statistic: 4.21 on 19 and 39 DF,  p-value: 0.0000719
# intermediate model
mod_alpha_3 = lm(alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 
             + log(days) - days
             + stay_at_home - PopulationEstimate2018 - PopTotalMale2017
             + I(POP_LATITUDE ^ 2) +  + I(POP_LONGITUDE ^ 2), 
             data = data_trn)
mod_alpha_4 = lm(alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 + 
               log(days) - days, 
             data = data_trn)

test_mod(mod_alpha_1, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  2.751e-02  4.329e-02  6.639e-02  1.278e-09  2.000e+00  2.533e-02        Inf
test_mod(mod_alpha_2, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.02896    0.51252    0.02816    0.05511   20.00000    0.02461        Inf
test_mod(mod_alpha_3, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.025490   0.509506   0.008626   0.008274  16.000000   0.022995        Inf
test_mod(mod_alpha_4, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.025671   0.485239   0.007565   0.019305  15.000000   0.023262        Inf
summary(mod_alpha_3)
## 
## Call:
## lm(formula = alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days + stay_at_home - PopulationEstimate2018 - 
##     PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2), 
##     data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03054 -0.01042  0.00047  0.00781  0.06194 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -9.77e+01   6.42e+01   -1.52   0.1354    
## k                                8.45e-02   1.86e-02    4.54  4.5e-05 ***
## lamda                            9.36e-01   1.77e-01    5.29  3.9e-06 ***
## c                                3.78e-02   1.07e-02    3.52   0.0010 ** 
## miu                             -5.21e-01   2.62e-01   -1.99   0.0530 .  
## POP_LATITUDE                     1.87e+00   1.18e+00    1.58   0.1210    
## POP_LONGITUDE                   -1.29e+00   8.74e-01   -1.47   0.1481    
## PopulationEstimate_above65_2017  1.34e-05   1.03e-05    1.31   0.1985    
## DiabetesPercentage               7.53e-01   4.76e-01    1.58   0.1210    
## Smokers_Percentage              -3.95e-01   2.55e-01   -1.55   0.1284    
## HeartDiseaseMortality            8.19e-03   4.33e-03    1.89   0.0653 .  
## StrokeMortality                 -4.47e-02   2.13e-02   -2.10   0.0417 *  
## log(days)                       -1.69e-02   4.29e-03   -3.93   0.0003 ***
## stay_at_home                    -9.37e-02   6.90e-02   -1.36   0.1817    
## I(POP_LATITUDE^2)               -2.34e-02   1.49e-02   -1.57   0.1238    
## I(POP_LONGITUDE^2)              -6.68e-03   4.56e-03   -1.47   0.1499    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0193 on 43 degrees of freedom
## Multiple R-squared:  0.636,  Adjusted R-squared:  0.51 
## F-statistic: 5.02 on 15 and 43 DF,  p-value: 0.0000154
summary(mod_alpha_4)
## 
## Call:
## lm(formula = alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03305 -0.01194  0.00035  0.00802  0.05910 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.93e+00   5.82e-01   -3.32  0.00180 ** 
## k                                8.67e-02   1.90e-02    4.56  4.1e-05 ***
## lamda                            9.95e-01   1.78e-01    5.59  1.3e-06 ***
## c                                4.35e-02   1.05e-02    4.14  0.00015 ***
## miu                             -6.93e-01   2.49e-01   -2.78  0.00794 ** 
## POP_LATITUDE                     2.47e-02   7.17e-03    3.44  0.00127 ** 
## POP_LONGITUDE                   -8.54e-03   2.49e-03   -3.43  0.00131 ** 
## PopulationEstimate2018           1.49e-05   4.30e-06    3.46  0.00121 ** 
## PopTotalMale2017                -2.85e-05   8.26e-06   -3.45  0.00125 ** 
## PopulationEstimate_above65_2017 -8.49e-06   2.51e-06   -3.39  0.00150 ** 
## DiabetesPercentage               1.01e-01   2.69e-02    3.77  0.00048 ***
## Smokers_Percentage              -3.85e-02   1.01e-02   -3.81  0.00043 ***
## HeartDiseaseMortality            3.11e-03   9.64e-04    3.22  0.00239 ** 
## StrokeMortality                 -1.83e-02   4.79e-03   -3.81  0.00042 ***
## log(days)                       -1.66e-02   4.39e-03   -3.78  0.00047 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0197 on 44 degrees of freedom
## Multiple R-squared:  0.609,  Adjusted R-squared:  0.485 
## F-statistic: 4.91 on 14 and 44 DF,  p-value: 0.0000239
# intermediate model 
# relatively bad models
mod_alpha_5 = lm(alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - (k + lamda + c + omega + miu), 
             data = data_trn)
mod_alpha_6 = lm(alpha ~  k + lamda + c + omega + miu, 
             data = data_trn)

test_mod(mod_alpha_1, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  2.751e-02  4.329e-02  6.639e-02  1.278e-09  2.000e+00  2.533e-02        Inf
test_mod(mod_alpha_2, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.02896    0.51252    0.02816    0.05511   20.00000    0.02461        Inf
test_mod(mod_alpha_3, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.025490   0.509506   0.008626   0.008274  16.000000   0.022995        Inf
test_mod(mod_alpha_4, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.025671   0.485239   0.007565   0.019305  15.000000   0.023262        Inf
test_mod(mod_alpha_5, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  3.376e-02  1.156e-01  2.723e-02  1.127e-05  1.500e+01  2.189e-02        Inf
test_mod(mod_alpha_6, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
## 0.02497976 0.39740134 0.00033377 0.00004824 6.00000000 0.01597093        Inf
summary(mod_alpha_5)
## 
## Call:
## lm(formula = alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - (k + lamda + 
##     c + omega + miu), data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.04609 -0.01070 -0.00096  0.00612  0.10069 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                     -1.15e+00   6.13e-01   -1.88    0.067 .
## cases                           -3.10e-08   2.80e-07   -0.11    0.912  
## deaths                           2.89e-06   3.36e-06    0.86    0.394  
## recovered                       -7.14e-07   1.05e-06   -0.68    0.498  
## POP_LATITUDE                     1.68e-02   7.95e-03    2.11    0.041 *
## POP_LONGITUDE                   -4.95e-03   2.65e-03   -1.87    0.068 .
## PopulationEstimate2018           7.92e-06   4.28e-06    1.85    0.071 .
## PopTotalMale2017                -1.50e-05   8.21e-06   -1.83    0.074 .
## PopulationEstimate_above65_2017 -4.55e-06   2.91e-06   -1.56    0.126  
## PopulationDensityperSqMile2010  -4.13e-05   2.21e-05   -1.87    0.068 .
## DiabetesPercentage               7.06e-02   3.32e-02    2.13    0.039 *
## Smokers_Percentage              -3.17e-02   1.44e-02   -2.20    0.033 *
## HeartDiseaseMortality            3.14e-03   1.59e-03    1.97    0.056 .
## StrokeMortality                 -1.54e-02   7.65e-03   -2.02    0.050 *
## days                            -1.80e-04   2.33e-04   -0.77    0.443  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0259 on 44 degrees of freedom
## Multiple R-squared:  0.329,  Adjusted R-squared:  0.116 
## F-statistic: 1.54 on 14 and 44 DF,  p-value: 0.136
summary(mod_alpha_6)
## 
## Call:
## lm(formula = alpha ~ k + lamda + c + omega + miu, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03314 -0.00985 -0.00122  0.00752  0.07859 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.05160    0.01893   -2.73  0.00866 ** 
## k            0.02827    0.01580    1.79  0.07935 .  
## lamda        0.51238    0.13399    3.82  0.00035 ***
## c            0.01970    0.00912    2.16  0.03520 *  
## omega       11.31149    2.51653    4.49 0.000038 ***
## miu         -0.31904    0.11971   -2.66  0.01018 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0213 on 53 degrees of freedom
## Multiple R-squared:  0.449,  Adjusted R-squared:  0.397 
## F-statistic: 8.65 on 5 and 53 DF,  p-value: 4.81e-06
mod_alpha = mod_alpha_4
# diagnostics(mod_alpha, testit = FALSE)

Modeling omega

# full additive model
mod_omega_full = lm(omega ~ ., data = data_trn)
test_mod(mod_omega_full, k = 6)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0014028  0.5044044  0.0275331  0.3528000 29.0000000  0.0007156        Inf
summary(mod_omega_full)
## 
## Call:
## lm(formula = omega ~ ., data = data_trn)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0017815 -0.0003972 -0.0000795  0.0003670  0.0016757 
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -3.58e-02   2.82e-02   -1.27  0.21271    
## k                                8.68e-04   9.69e-04    0.89  0.37630    
## sigma                                  NA         NA      NA       NA    
## lamda                            1.81e-03   9.66e-03    0.19  0.85212    
## c                                1.94e-04   5.30e-04    0.37  0.71646    
## alpha                            1.54e-02   6.51e-03    2.36  0.02342 *  
## miu                              1.17e-03   1.29e-02    0.09  0.92811    
## cases                           -5.02e-09   1.00e-08   -0.50  0.61947    
## deaths                          -1.17e-07   1.16e-07   -1.01  0.31698    
## recovered                        5.32e-08   3.95e-08    1.35  0.18586    
## POP_LATITUDE                     3.95e-04   3.60e-04    1.10  0.27871    
## POP_LONGITUDE                   -1.93e-04   1.20e-04   -1.61  0.11503    
## PopulationEstimate2018           2.40e-07   2.05e-07    1.17  0.24989    
## PopTotalMale2017                -4.57e-07   3.94e-07   -1.16  0.25348    
## PopulationEstimate_above65_2017 -1.61e-07   1.26e-07   -1.28  0.20858    
## PopulationDensityperSqMile2010   8.04e-07   8.54e-07    0.94  0.35190    
## DiabetesPercentage               1.99e-03   1.43e-03    1.39  0.17223    
## Smokers_Percentage              -6.98e-04   6.02e-04   -1.16  0.25337    
## HeartDiseaseMortality            5.72e-05   6.28e-05    0.91  0.36748    
## StrokeMortality                 -3.36e-04   3.10e-04   -1.09  0.28441    
## Hospitals                              NA         NA      NA       NA    
## ICU_beds                               NA         NA      NA       NA    
## HospParticipatinginNetwork2017         NA         NA      NA       NA    
## stay_at_home                           NA         NA      NA       NA    
## above_50_gatherings                    NA         NA      NA       NA    
## above_500_gatherings                   NA         NA      NA       NA    
## restaurant_dine_in                     NA         NA      NA       NA    
## entertainment_gym                      NA         NA      NA       NA    
## days                            -3.03e-05   8.08e-06   -3.75  0.00058 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000834 on 39 degrees of freedom
## Multiple R-squared:  0.667,  Adjusted R-squared:  0.504 
## F-statistic: 4.11 on 19 and 39 DF,  p-value: 0.0000931
# small additive model
mod_omega_1 = lm(omega ~ days, data = data_trn)
# large additive model
mod_omega_2 = lm(omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)

test_mod(mod_omega_1, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0010009  0.3186830  0.0578837  0.0002535  2.0000000  0.0009460        Inf
test_mod(mod_omega_2, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0014028  0.5044044  0.0275331  0.3528000 20.0000000  0.0007156        Inf
summary(mod_omega_1)
## 
## Call:
## lm(formula = omega ~ days, data = data_trn)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.001438 -0.000550 -0.000128  0.000312  0.003806 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.52e-03   2.87e-04    8.78  3.5e-12 ***
## days        -2.69e-05   5.07e-06   -5.30  1.9e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000977 on 57 degrees of freedom
## Multiple R-squared:  0.33,   Adjusted R-squared:  0.319 
## F-statistic: 28.1 on 1 and 57 DF,  p-value: 1.92e-06
summary(mod_omega_2)
## 
## Call:
## lm(formula = omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0017815 -0.0003972 -0.0000795  0.0003670  0.0016757 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -3.58e-02   2.82e-02   -1.27  0.21271    
## k                                8.68e-04   9.69e-04    0.89  0.37630    
## lamda                            1.81e-03   9.66e-03    0.19  0.85212    
## c                                1.94e-04   5.30e-04    0.37  0.71646    
## alpha                            1.54e-02   6.51e-03    2.36  0.02342 *  
## miu                              1.17e-03   1.29e-02    0.09  0.92811    
## cases                           -5.02e-09   1.00e-08   -0.50  0.61947    
## deaths                          -1.17e-07   1.16e-07   -1.01  0.31698    
## recovered                        5.32e-08   3.95e-08    1.35  0.18586    
## POP_LATITUDE                     3.95e-04   3.60e-04    1.10  0.27871    
## POP_LONGITUDE                   -1.93e-04   1.20e-04   -1.61  0.11503    
## PopulationEstimate2018           2.40e-07   2.05e-07    1.17  0.24989    
## PopTotalMale2017                -4.57e-07   3.94e-07   -1.16  0.25348    
## PopulationEstimate_above65_2017 -1.61e-07   1.26e-07   -1.28  0.20858    
## PopulationDensityperSqMile2010   8.04e-07   8.54e-07    0.94  0.35190    
## DiabetesPercentage               1.99e-03   1.43e-03    1.39  0.17223    
## Smokers_Percentage              -6.98e-04   6.02e-04   -1.16  0.25337    
## HeartDiseaseMortality            5.72e-05   6.28e-05    0.91  0.36748    
## StrokeMortality                 -3.36e-04   3.10e-04   -1.09  0.28441    
## days                            -3.03e-05   8.08e-06   -3.75  0.00058 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000834 on 39 degrees of freedom
## Multiple R-squared:  0.667,  Adjusted R-squared:  0.504 
## F-statistic: 4.11 on 19 and 39 DF,  p-value: 0.0000931
# intermediate model
mod_omega_3 = lm(omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 
             + log(days) - days
             + stay_at_home - PopulationEstimate2018 - PopTotalMale2017
             + I(POP_LATITUDE ^ 2) +  + I(POP_LONGITUDE ^ 2), 
             data = data_trn)
mod_omega_4 = lm(omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 + 
               log(days) - days, 
             data = data_trn)

test_mod(mod_omega_1, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0010009  0.3186830  0.0578837  0.0002535  2.0000000  0.0009460        Inf
test_mod(mod_omega_2, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0014028  0.5044044  0.0275331  0.3528000 20.0000000  0.0007156        Inf
test_mod(mod_omega_3, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.001163   0.604801   0.030730   0.350808  17.000000   0.000540        Inf
test_mod(mod_omega_4, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0011062  0.6139613  0.0219400  0.3405360 16.0000000  0.0005402        Inf
summary(mod_omega_3)
## 
## Call:
## lm(formula = omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days + stay_at_home - PopulationEstimate2018 - 
##     PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2), 
##     data = data_trn)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0016894 -0.0004084 -0.0000319  0.0003170  0.0016192 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -3.49e+00   2.55e+00   -1.37     0.18    
## k                                1.31e-03   8.75e-04    1.49     0.14    
## lamda                            8.57e-03   8.79e-03    0.98     0.33    
## c                                4.17e-04   4.71e-04    0.89     0.38    
## alpha                            7.54e-03   5.90e-03    1.28     0.21    
## miu                              5.88e-03   1.06e-02    0.56     0.58    
## POP_LATITUDE                     6.49e-02   4.70e-02    1.38     0.17    
## POP_LONGITUDE                   -4.70e-02   3.46e-02   -1.36     0.18    
## PopulationEstimate_above65_2017  5.07e-07   4.04e-07    1.25     0.22    
## DiabetesPercentage               2.69e-02   1.89e-02    1.42     0.16    
## Smokers_Percentage              -1.43e-02   1.01e-02   -1.41     0.17    
## HeartDiseaseMortality            2.89e-04   1.74e-04    1.66     0.10    
## StrokeMortality                 -1.45e-03   8.64e-04   -1.68     0.10    
## log(days)                       -1.26e-03   1.94e-04   -6.51  7.2e-08 ***
## stay_at_home                    -3.61e-03   2.73e-03   -1.32     0.19    
## I(POP_LATITUDE^2)               -8.15e-04   5.92e-04   -1.38     0.18    
## I(POP_LONGITUDE^2)              -2.45e-04   1.81e-04   -1.35     0.18    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000744 on 42 degrees of freedom
## Multiple R-squared:  0.714,  Adjusted R-squared:  0.605 
## F-statistic: 6.55 on 16 and 42 DF,  p-value: 4.96e-07
summary(mod_omega_4)
## 
## Call:
## lm(formula = omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days, data = data_trn)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0016885 -0.0004072 -0.0000307  0.0003192  0.0016125 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -4.34e-02   2.43e-02   -1.78    0.081 .  
## k                                1.31e-03   8.61e-04    1.52    0.136    
## lamda                            8.58e-03   8.68e-03    0.99    0.328    
## c                                4.14e-04   4.62e-04    0.90    0.375    
## alpha                            7.45e-03   5.62e-03    1.32    0.192    
## miu                              6.04e-03   1.01e-02    0.60    0.552    
## POP_LATITUDE                     5.60e-04   3.01e-04    1.86    0.070 .  
## POP_LONGITUDE                   -2.34e-04   1.04e-04   -2.24    0.030 *  
## PopulationEstimate2018           3.04e-07   1.81e-07    1.68    0.101    
## PopTotalMale2017                -5.80e-07   3.47e-07   -1.67    0.102    
## PopulationEstimate_above65_2017 -1.94e-07   1.05e-07   -1.85    0.071 .  
## DiabetesPercentage               2.61e-03   1.15e-03    2.26    0.029 *  
## Smokers_Percentage              -1.08e-03   4.35e-04   -2.49    0.017 *  
## HeartDiseaseMortality            9.78e-05   4.00e-05    2.44    0.019 *  
## StrokeMortality                 -5.00e-04   2.06e-04   -2.43    0.019 *  
## log(days)                       -1.26e-03   1.89e-04   -6.69  3.6e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000736 on 43 degrees of freedom
## Multiple R-squared:  0.714,  Adjusted R-squared:  0.614 
## F-statistic: 7.15 on 15 and 43 DF,  p-value: 1.79e-07
# intermediate model 
# relatively bad models
mod_omega_5 = lm(omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - (k + lamda + c + alpha + miu), 
             data = data_trn)
mod_omega_6 = lm(omega ~  k + lamda + c + alpha + miu, 
             data = data_trn)

test_mod(mod_omega_1, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0010009  0.3186830  0.0578837  0.0002535  2.0000000  0.0009460        Inf
test_mod(mod_omega_2, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0014028  0.5044044  0.0275331  0.3528000 20.0000000  0.0007156        Inf
test_mod(mod_omega_3, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.001163   0.604801   0.030730   0.350808  17.000000   0.000540        Inf
test_mod(mod_omega_4, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0011062  0.6139613  0.0219400  0.3405360 16.0000000  0.0005402        Inf
test_mod(mod_omega_5, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0011632  0.4042799  0.0001611  0.0260695 15.0000000  0.0007283        Inf
test_mod(mod_omega_6, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
## 0.00150272 0.29957618 0.18905170 0.00009094 6.00000000 0.00085551        Inf
summary(mod_omega_5)
## 
## Call:
## lm(formula = omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - (k + lamda + 
##     c + alpha + miu), data = data_trn)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0023595 -0.0003765  0.0000225  0.0003963  0.0029578 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -4.88e-02   2.17e-02   -2.25  0.02941 *  
## cases                           -7.38e-09   9.90e-09   -0.74  0.46038    
## deaths                          -4.70e-08   1.19e-07   -0.39  0.69477    
## recovered                        4.38e-08   3.70e-08    1.19  0.24185    
## POP_LATITUDE                     6.03e-04   2.81e-04    2.14  0.03756 *  
## POP_LONGITUDE                   -2.47e-04   9.36e-05   -2.64  0.01135 *  
## PopulationEstimate2018           3.28e-07   1.51e-07    2.17  0.03570 *  
## PopTotalMale2017                -6.24e-07   2.90e-07   -2.15  0.03699 *  
## PopulationEstimate_above65_2017 -2.05e-07   1.03e-07   -1.99  0.05320 .  
## PopulationDensityperSqMile2010   1.35e-07   7.81e-07    0.17  0.86377    
## DiabetesPercentage               2.79e-03   1.17e-03    2.38  0.02158 *  
## Smokers_Percentage              -1.06e-03   5.09e-04   -2.09  0.04219 *  
## HeartDiseaseMortality            9.58e-05   5.64e-05    1.70  0.09642 .  
## StrokeMortality                 -5.11e-04   2.71e-04   -1.89  0.06559 .  
## days                            -3.11e-05   8.24e-06   -3.78  0.00047 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000914 on 44 degrees of freedom
## Multiple R-squared:  0.548,  Adjusted R-squared:  0.404 
## F-statistic: 3.81 on 14 and 44 DF,  p-value: 0.000331
summary(mod_omega_6)
## 
## Call:
## lm(formula = omega ~ k + lamda + c + alpha + miu, data = data_trn)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.001569 -0.000517 -0.000270  0.000534  0.002377 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.001837   0.000904    2.03    0.047 *  
## k           -0.001094   0.000741   -1.48    0.146    
## lamda       -0.005744   0.006985   -0.82    0.415    
## c           -0.000374   0.000439   -0.85    0.398    
## alpha        0.024399   0.005428    4.49 0.000038 ***
## miu          0.010868   0.005730    1.90    0.063 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000991 on 53 degrees of freedom
## Multiple R-squared:  0.36,   Adjusted R-squared:   0.3 
## F-statistic: 5.96 on 5 and 53 DF,  p-value: 0.000191
mod_omega = mod_omega_4
# diagnostics(mod_omega, testit = FALSE)

Modeling miu

# full additive model
mod_miu_full = lm(miu ~ ., data = data_trn)
test_mod(mod_miu_full, k = 7)
## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = data_tst): prediction from a rank-
## deficient fit may be misleading
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.017161   0.894881   0.352920   0.928797  29.000000   0.009206  41.837402
summary(mod_miu_full)
## 
## Call:
## lm(formula = miu ~ ., data = data_trn)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.018275 -0.005736 -0.000703  0.005370  0.019586 
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.22e+00   2.99e-01   -4.08  0.00022 ***
## k                                2.53e-02   1.14e-02    2.21  0.03272 *  
## sigma                                  NA         NA      NA       NA    
## lamda                            4.32e-01   9.79e-02    4.41  7.8e-05 ***
## c                                2.56e-02   5.15e-03    4.96  1.4e-05 ***
## alpha                           -8.31e-02   8.52e-02   -0.98  0.33538    
## omega                            1.80e-01   1.98e+00    0.09  0.92811    
## cases                           -1.75e-07   1.22e-07   -1.44  0.15723    
## deaths                          -1.08e-06   1.44e-06   -0.75  0.45731    
## recovered                        1.19e-06   4.63e-07    2.57  0.01415 *  
## POP_LATITUDE                     1.44e-02   3.89e-03    3.71  0.00065 ***
## POP_LONGITUDE                   -4.99e-03   1.31e-03   -3.82  0.00046 ***
## PopulationEstimate2018           1.01e-05   2.02e-06    5.03  1.1e-05 ***
## PopTotalMale2017                -1.96e-05   3.85e-06   -5.09  9.5e-06 ***
## PopulationEstimate_above65_2017 -4.73e-06   1.40e-06   -3.38  0.00165 ** 
## PopulationDensityperSqMile2010   2.30e-05   1.00e-05    2.29  0.02739 *  
## DiabetesPercentage               4.43e-02   1.68e-02    2.64  0.01188 *  
## Smokers_Percentage              -1.10e-02   7.39e-03   -1.49  0.14522    
## HeartDiseaseMortality            6.60e-04   7.79e-04    0.85  0.40224    
## StrokeMortality                 -4.27e-03   3.84e-03   -1.11  0.27299    
## Hospitals                              NA         NA      NA       NA    
## ICU_beds                               NA         NA      NA       NA    
## HospParticipatinginNetwork2017         NA         NA      NA       NA    
## stay_at_home                           NA         NA      NA       NA    
## above_50_gatherings                    NA         NA      NA       NA    
## above_500_gatherings                   NA         NA      NA       NA    
## restaurant_dine_in                     NA         NA      NA       NA    
## entertainment_gym                      NA         NA      NA       NA    
## days                            -1.30e-04   1.15e-04   -1.13  0.26392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0103 on 39 degrees of freedom
## Multiple R-squared:  0.929,  Adjusted R-squared:  0.895 
## F-statistic:   27 on 19 and 39 DF,  p-value: <2e-16
# small additive model
mod_miu_1 = lm(miu ~ days, data = data_trn)
# large additive model
mod_miu_2 = lm(miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma, data = data_trn)

test_mod(mod_miu_1, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.032721  -0.015921   0.032293   0.000945   2.000000   0.025313 149.431195
test_mod(mod_miu_2, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.017161   0.894881   0.352920   0.928797  20.000000   0.009206  41.837402
summary(mod_miu_1)
## 
## Call:
## lm(formula = miu ~ days, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.04092 -0.02621 -0.00039  0.01751  0.10555 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0363221  0.0094359    3.85   0.0003 ***
## days        0.0000503  0.0001666    0.30   0.7639    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0321 on 57 degrees of freedom
## Multiple R-squared:  0.0016, Adjusted R-squared:  -0.0159 
## F-statistic: 0.0911 on 1 and 57 DF,  p-value: 0.764
summary(mod_miu_2)
## 
## Call:
## lm(formula = miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.018275 -0.005736 -0.000703  0.005370  0.019586 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.22e+00   2.99e-01   -4.08  0.00022 ***
## k                                2.53e-02   1.14e-02    2.21  0.03272 *  
## lamda                            4.32e-01   9.79e-02    4.41  7.8e-05 ***
## c                                2.56e-02   5.15e-03    4.96  1.4e-05 ***
## alpha                           -8.31e-02   8.52e-02   -0.98  0.33538    
## omega                            1.80e-01   1.98e+00    0.09  0.92811    
## cases                           -1.75e-07   1.22e-07   -1.44  0.15723    
## deaths                          -1.08e-06   1.44e-06   -0.75  0.45731    
## recovered                        1.19e-06   4.63e-07    2.57  0.01415 *  
## POP_LATITUDE                     1.44e-02   3.89e-03    3.71  0.00065 ***
## POP_LONGITUDE                   -4.99e-03   1.31e-03   -3.82  0.00046 ***
## PopulationEstimate2018           1.01e-05   2.02e-06    5.03  1.1e-05 ***
## PopTotalMale2017                -1.96e-05   3.85e-06   -5.09  9.5e-06 ***
## PopulationEstimate_above65_2017 -4.73e-06   1.40e-06   -3.38  0.00165 ** 
## PopulationDensityperSqMile2010   2.30e-05   1.00e-05    2.29  0.02739 *  
## DiabetesPercentage               4.43e-02   1.68e-02    2.64  0.01188 *  
## Smokers_Percentage              -1.10e-02   7.39e-03   -1.49  0.14522    
## HeartDiseaseMortality            6.60e-04   7.79e-04    0.85  0.40224    
## StrokeMortality                 -4.27e-03   3.84e-03   -1.11  0.27299    
## days                            -1.30e-04   1.15e-04   -1.13  0.26392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0103 on 39 degrees of freedom
## Multiple R-squared:  0.929,  Adjusted R-squared:  0.895 
## F-statistic:   27 on 19 and 39 DF,  p-value: <2e-16
# intermediate model
mod_miu_3 = lm(miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 
             + log(days) - days
             + stay_at_home - PopulationEstimate2018 - PopTotalMale2017
             + I(POP_LATITUDE ^ 2) +  + I(POP_LONGITUDE ^ 2), 
             data = data_trn)
mod_miu_4 = lm(miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - cases - deaths - omega - recovered - PopulationDensityperSqMile2010 + 
               log(days) - days, 
             data = data_trn)

test_mod(mod_miu_1, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.032721  -0.015921   0.032293   0.000945   2.000000   0.025313 149.431195
test_mod(mod_miu_2, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.017161   0.894881   0.352920   0.928797  20.000000   0.009206  41.837402
test_mod(mod_miu_3, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.01807    0.88667    0.20086    0.93226   16.00000    0.01229   47.47454
test_mod(mod_miu_4, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.01703    0.88089    0.19835    0.83854   15.00000    0.01365   69.23757
summary(mod_miu_3)
## 
## Call:
## lm(formula = miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days + stay_at_home - PopulationEstimate2018 - 
##     PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2), 
##     data = data_trn)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.020163 -0.005322 -0.000809  0.005689  0.023672 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.56e+02   2.80e+01   -5.58  1.5e-06 ***
## k                                3.14e-02   1.17e-02    2.69    0.010 *  
## lamda                            5.46e-01   9.54e-02    5.73  9.2e-07 ***
## c                                3.01e-02   5.01e-03    6.01  3.5e-07 ***
## alpha                           -1.62e-01   8.13e-02   -1.99    0.053 .  
## POP_LATITUDE                     2.87e+00   5.16e-01    5.56  1.6e-06 ***
## POP_LONGITUDE                   -2.12e+00   3.81e-01   -5.56  1.6e-06 ***
## PopulationEstimate_above65_2017  2.44e-05   4.49e-06    5.44  2.4e-06 ***
## DiabetesPercentage               1.16e+00   2.08e-01    5.60  1.4e-06 ***
## Smokers_Percentage              -6.18e-01   1.11e-01   -5.56  1.6e-06 ***
## HeartDiseaseMortality            1.07e-02   1.91e-03    5.59  1.4e-06 ***
## StrokeMortality                 -5.26e-02   9.53e-03   -5.52  1.8e-06 ***
## log(days)                       -2.21e-03   2.77e-03   -0.80    0.429    
## stay_at_home                    -1.65e-01   3.02e-02   -5.45  2.3e-06 ***
## I(POP_LATITUDE^2)               -3.61e-02   6.51e-03   -5.55  1.7e-06 ***
## I(POP_LONGITUDE^2)              -1.10e-02   1.99e-03   -5.56  1.6e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0107 on 43 degrees of freedom
## Multiple R-squared:  0.916,  Adjusted R-squared:  0.887 
## F-statistic: 31.3 on 15 and 43 DF,  p-value: <2e-16
summary(mod_miu_4)
## 
## Call:
## lm(formula = miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.02263 -0.00679 -0.00071  0.00571  0.02444 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.77e+00   2.47e-01   -7.17  6.5e-09 ***
## k                                3.60e-02   1.17e-02    3.08   0.0035 ** 
## lamda                            5.93e-01   9.41e-02    6.30  1.2e-07 ***
## c                                3.07e-02   5.12e-03    6.00  3.3e-07 ***
## alpha                           -2.16e-01   7.75e-02   -2.78   0.0079 ** 
## POP_LATITUDE                     2.26e-02   2.95e-03    7.67  1.2e-09 ***
## POP_LONGITUDE                   -7.10e-03   1.14e-03   -6.24  1.5e-07 ***
## PopulationEstimate2018           1.34e-05   1.81e-06    7.41  2.9e-09 ***
## PopTotalMale2017                -2.58e-05   3.45e-06   -7.47  2.3e-09 ***
## PopulationEstimate_above65_2017 -6.78e-06   1.19e-06   -5.69  9.6e-07 ***
## DiabetesPercentage               7.36e-02   1.32e-02    5.56  1.5e-06 ***
## Smokers_Percentage              -2.62e-02   5.16e-03   -5.08  7.3e-06 ***
## HeartDiseaseMortality            2.37e-03   4.79e-04    4.96  1.1e-05 ***
## StrokeMortality                 -1.15e-02   2.55e-03   -4.51  4.8e-05 ***
## log(days)                       -3.23e-03   2.78e-03   -1.16   0.2514    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.011 on 44 degrees of freedom
## Multiple R-squared:  0.91,   Adjusted R-squared:  0.881 
## F-statistic: 31.6 on 14 and 44 DF,  p-value: <2e-16
# intermediate model 
# relatively bad models
mod_miu_5 = lm(miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - stay_at_home - above_50_gatherings - above_500_gatherings - restaurant_dine_in - entertainment_gym - sigma
             - (k + lamda + c + alpha + omega), 
             data = data_trn)
mod_miu_6 = lm(miu ~  k + lamda + c + alpha + omega, 
             data = data_trn)

test_mod(mod_miu_1, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.032721  -0.015921   0.032293   0.000945   2.000000   0.025313 149.431195
test_mod(mod_miu_2, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.017161   0.894881   0.352920   0.928797  20.000000   0.009206  41.837402
test_mod(mod_miu_3, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.01807    0.88667    0.20086    0.93226   16.00000    0.01229   47.47454
test_mod(mod_miu_4, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.01703    0.88089    0.19835    0.83854   15.00000    0.01365   69.23757
test_mod(mod_miu_5, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.016233   0.832344   0.263518   0.454574  15.000000   0.008545  38.753492
test_mod(mod_miu_6, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0272487  0.4796789  0.5420675  0.0002727  6.0000000  0.0187462 86.0715224
summary(mod_miu_5)
## 
## Call:
## lm(formula = miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - (k + lamda + 
##     c + alpha + omega), data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03007 -0.00653 -0.00129  0.00774  0.02857 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.07e+00   3.09e-01   -3.46  0.00122 ** 
## cases                           -4.11e-07   1.41e-07   -2.91  0.00571 ** 
## deaths                           2.74e-08   1.70e-06    0.02  0.98721    
## recovered                        1.90e-06   5.28e-07    3.59  0.00082 ***
## POP_LATITUDE                     1.26e-02   4.01e-03    3.15  0.00294 ** 
## POP_LONGITUDE                   -4.47e-03   1.34e-03   -3.34  0.00170 ** 
## PopulationEstimate2018           9.10e-06   2.16e-06    4.21  0.00012 ***
## PopTotalMale2017                -1.78e-05   4.14e-06   -4.28 0.000098 ***
## PopulationEstimate_above65_2017 -3.48e-06   1.47e-06   -2.36  0.02253 *  
## PopulationDensityperSqMile2010   2.61e-05   1.12e-05    2.34  0.02369 *  
## DiabetesPercentage               3.37e-02   1.68e-02    2.01  0.05055 .  
## Smokers_Percentage              -6.10e-03   7.26e-03   -0.84  0.40531    
## HeartDiseaseMortality            2.01e-04   8.05e-04    0.25  0.80393    
## StrokeMortality                 -1.14e-03   3.86e-03   -0.29  0.77024    
## days                            -8.34e-05   1.18e-04   -0.71  0.48179    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0131 on 44 degrees of freedom
## Multiple R-squared:  0.873,  Adjusted R-squared:  0.832 
## F-statistic: 21.6 on 14 and 44 DF,  p-value: 3.44e-15
summary(mod_miu_6)
## 
## Call:
## lm(formula = miu ~ k + lamda + c + alpha + omega, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03282 -0.01332 -0.00260  0.00867  0.07146 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.04172    0.02101   -1.99    0.052 .  
## k            0.01310    0.01744    0.75    0.456    
## lamda        0.78055    0.12288    6.35  5.0e-08 ***
## c            0.04364    0.00831    5.25  2.7e-06 ***
## alpha       -0.37039    0.13898   -2.66    0.010 *  
## omega        5.84918    3.08374    1.90    0.063 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.023 on 53 degrees of freedom
## Multiple R-squared:  0.525,  Adjusted R-squared:  0.48 
## F-statistic: 11.7 on 5 and 53 DF,  p-value: 1.22e-07
mod_miu = mod_miu_2
# diagnostics(mod_miu, testit = FALSE)

Result

Six Models & Summaries

summary(mod_k)
## 
## Call:
## lm(formula = k ~ lamda + c + alpha + omega + miu, data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0289 -0.0445  0.0356  0.0886  0.2567 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.1002     0.0791   13.90   <2e-16 ***
## lamda        -2.7175     1.2218   -2.22    0.030 *  
## c            -0.1608     0.0772   -2.08    0.042 *  
## alpha         2.0142     1.1260    1.79    0.079 .  
## omega       -36.1277    24.4670   -1.48    0.146    
## miu           0.8043     1.0704    0.75    0.456    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.18 on 53 degrees of freedom
## Multiple R-squared:  0.162,  Adjusted R-squared:  0.0833 
## F-statistic: 2.05 on 5 and 53 DF,  p-value: 0.0859
summary(mod_lamda)
## 
## Call:
## lm(formula = lamda ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03373 -0.00652 -0.00214  0.00673  0.02291 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.63e+00   3.43e-01    4.74  2.3e-05 ***
## k                               -6.72e-02   1.10e-02   -6.11  2.3e-07 ***
## c                               -4.42e-02   4.46e-03   -9.92  8.6e-13 ***
## alpha                            4.18e-01   7.46e-02    5.59  1.3e-06 ***
## miu                              8.00e-01   1.27e-01    6.30  1.2e-07 ***
## POP_LATITUDE                    -2.07e-02   4.19e-03   -4.95  1.1e-05 ***
## POP_LONGITUDE                    6.72e-03   1.50e-03    4.46  5.5e-05 ***
## PopulationEstimate2018          -1.16e-05   2.61e-06   -4.46  5.6e-05 ***
## PopTotalMale2017                 2.21e-05   5.02e-06    4.41  6.6e-05 ***
## PopulationEstimate_above65_2017  7.13e-06   1.47e-06    4.84  1.6e-05 ***
## DiabetesPercentage              -7.93e-02   1.61e-02   -4.93  1.2e-05 ***
## Smokers_Percentage               2.99e-02   6.06e-03    4.93  1.2e-05 ***
## HeartDiseaseMortality           -2.60e-03   5.73e-04   -4.53  4.5e-05 ***
## StrokeMortality                  1.45e-02   2.83e-03    5.13  6.3e-06 ***
## log(days)                        7.73e-03   3.06e-03    2.53    0.015 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0128 on 44 degrees of freedom
## Multiple R-squared:  0.87,   Adjusted R-squared:  0.828 
## F-statistic:   21 on 14 and 44 DF,  p-value: 5.59e-15
summary(mod_c)
## 
## Call:
## lm(formula = c ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days + stay_at_home - PopulationEstimate2018 - 
##     PopTotalMale2017 + I(POP_LATITUDE^2) + +I(POP_LONGITUDE^2), 
##     data = data_trn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5448 -0.1371 -0.0494  0.1446  0.4962 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      2.85e+03   7.01e+02    4.07  0.00020 ***
## k                               -1.08e+00   2.31e-01   -4.67  2.9e-05 ***
## lamda                           -1.53e+01   1.62e+00   -9.43  4.9e-12 ***
## alpha                            5.92e+00   1.68e+00    3.52  0.00103 ** 
## miu                              1.52e+01   2.52e+00    6.01  3.5e-07 ***
## POP_LATITUDE                    -5.29e+01   1.29e+01   -4.10  0.00018 ***
## POP_LONGITUDE                    3.86e+01   9.54e+00    4.04  0.00021 ***
## PopulationEstimate_above65_2017 -4.41e-04   1.12e-04   -3.93  0.00030 ***
## DiabetesPercentage              -2.13e+01   5.20e+00   -4.10  0.00018 ***
## Smokers_Percentage               1.14e+01   2.78e+00    4.09  0.00019 ***
## HeartDiseaseMortality           -2.00e-01   4.74e-02   -4.22  0.00013 ***
## StrokeMortality                  1.01e+00   2.33e-01    4.33  8.7e-05 ***
## log(days)                        1.40e-01   5.88e-02    2.39  0.02155 *  
## stay_at_home                     3.00e+00   7.54e-01    3.97  0.00027 ***
## I(POP_LATITUDE^2)                6.65e-01   1.62e-01    4.09  0.00018 ***
## I(POP_LONGITUDE^2)               2.01e-01   4.97e-02    4.04  0.00022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.241 on 43 degrees of freedom
## Multiple R-squared:  0.748,  Adjusted R-squared:  0.661 
## F-statistic: 8.52 on 15 and 43 DF,  p-value: 1.51e-08
summary(mod_alpha)
## 
## Call:
## lm(formula = alpha ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days, data = data_trn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03305 -0.01194  0.00035  0.00802  0.05910 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.93e+00   5.82e-01   -3.32  0.00180 ** 
## k                                8.67e-02   1.90e-02    4.56  4.1e-05 ***
## lamda                            9.95e-01   1.78e-01    5.59  1.3e-06 ***
## c                                4.35e-02   1.05e-02    4.14  0.00015 ***
## miu                             -6.93e-01   2.49e-01   -2.78  0.00794 ** 
## POP_LATITUDE                     2.47e-02   7.17e-03    3.44  0.00127 ** 
## POP_LONGITUDE                   -8.54e-03   2.49e-03   -3.43  0.00131 ** 
## PopulationEstimate2018           1.49e-05   4.30e-06    3.46  0.00121 ** 
## PopTotalMale2017                -2.85e-05   8.26e-06   -3.45  0.00125 ** 
## PopulationEstimate_above65_2017 -8.49e-06   2.51e-06   -3.39  0.00150 ** 
## DiabetesPercentage               1.01e-01   2.69e-02    3.77  0.00048 ***
## Smokers_Percentage              -3.85e-02   1.01e-02   -3.81  0.00043 ***
## HeartDiseaseMortality            3.11e-03   9.64e-04    3.22  0.00239 ** 
## StrokeMortality                 -1.83e-02   4.79e-03   -3.81  0.00042 ***
## log(days)                       -1.66e-02   4.39e-03   -3.78  0.00047 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0197 on 44 degrees of freedom
## Multiple R-squared:  0.609,  Adjusted R-squared:  0.485 
## F-statistic: 4.91 on 14 and 44 DF,  p-value: 0.0000239
summary(mod_omega)
## 
## Call:
## lm(formula = omega ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma - cases - 
##     deaths - omega - recovered - PopulationDensityperSqMile2010 + 
##     log(days) - days, data = data_trn)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0016885 -0.0004072 -0.0000307  0.0003192  0.0016125 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -4.34e-02   2.43e-02   -1.78    0.081 .  
## k                                1.31e-03   8.61e-04    1.52    0.136    
## lamda                            8.58e-03   8.68e-03    0.99    0.328    
## c                                4.14e-04   4.62e-04    0.90    0.375    
## alpha                            7.45e-03   5.62e-03    1.32    0.192    
## miu                              6.04e-03   1.01e-02    0.60    0.552    
## POP_LATITUDE                     5.60e-04   3.01e-04    1.86    0.070 .  
## POP_LONGITUDE                   -2.34e-04   1.04e-04   -2.24    0.030 *  
## PopulationEstimate2018           3.04e-07   1.81e-07    1.68    0.101    
## PopTotalMale2017                -5.80e-07   3.47e-07   -1.67    0.102    
## PopulationEstimate_above65_2017 -1.94e-07   1.05e-07   -1.85    0.071 .  
## DiabetesPercentage               2.61e-03   1.15e-03    2.26    0.029 *  
## Smokers_Percentage              -1.08e-03   4.35e-04   -2.49    0.017 *  
## HeartDiseaseMortality            9.78e-05   4.00e-05    2.44    0.019 *  
## StrokeMortality                 -5.00e-04   2.06e-04   -2.43    0.019 *  
## log(days)                       -1.26e-03   1.89e-04   -6.69  3.6e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000736 on 43 degrees of freedom
## Multiple R-squared:  0.714,  Adjusted R-squared:  0.614 
## F-statistic: 7.15 on 15 and 43 DF,  p-value: 1.79e-07
summary(mod_miu)
## 
## Call:
## lm(formula = miu ~ . - Hospitals - ICU_beds - HospParticipatinginNetwork2017 - 
##     stay_at_home - above_50_gatherings - above_500_gatherings - 
##     restaurant_dine_in - entertainment_gym - sigma, data = data_trn)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.018275 -0.005736 -0.000703  0.005370  0.019586 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -1.22e+00   2.99e-01   -4.08  0.00022 ***
## k                                2.53e-02   1.14e-02    2.21  0.03272 *  
## lamda                            4.32e-01   9.79e-02    4.41  7.8e-05 ***
## c                                2.56e-02   5.15e-03    4.96  1.4e-05 ***
## alpha                           -8.31e-02   8.52e-02   -0.98  0.33538    
## omega                            1.80e-01   1.98e+00    0.09  0.92811    
## cases                           -1.75e-07   1.22e-07   -1.44  0.15723    
## deaths                          -1.08e-06   1.44e-06   -0.75  0.45731    
## recovered                        1.19e-06   4.63e-07    2.57  0.01415 *  
## POP_LATITUDE                     1.44e-02   3.89e-03    3.71  0.00065 ***
## POP_LONGITUDE                   -4.99e-03   1.31e-03   -3.82  0.00046 ***
## PopulationEstimate2018           1.01e-05   2.02e-06    5.03  1.1e-05 ***
## PopTotalMale2017                -1.96e-05   3.85e-06   -5.09  9.5e-06 ***
## PopulationEstimate_above65_2017 -4.73e-06   1.40e-06   -3.38  0.00165 ** 
## PopulationDensityperSqMile2010   2.30e-05   1.00e-05    2.29  0.02739 *  
## DiabetesPercentage               4.43e-02   1.68e-02    2.64  0.01188 *  
## Smokers_Percentage              -1.10e-02   7.39e-03   -1.49  0.14522    
## HeartDiseaseMortality            6.60e-04   7.79e-04    0.85  0.40224    
## StrokeMortality                 -4.27e-03   3.84e-03   -1.11  0.27299    
## days                            -1.30e-04   1.15e-04   -1.13  0.26392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0103 on 39 degrees of freedom
## Multiple R-squared:  0.929,  Adjusted R-squared:  0.895 
## F-statistic:   27 on 19 and 39 DF,  p-value: <2e-16

Model Diagonostics (Test Statistics)

test_mod(mod_k, k = 1)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  2.135e-01  8.328e-02  1.932e-02  7.076e-10  6.000e+00  5.237e-02  5.250e+00
test_mod(mod_lamda, k = 3)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##    0.01745    0.82845    0.19372    0.01659   15.00000    0.01620   24.31619
test_mod(mod_c, k = 4)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##     0.3285     0.6605     0.2078     0.1439    16.0000     0.2592        Inf
test_mod(mod_alpha, k = 5)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.025671   0.485239   0.007565   0.019305  15.000000   0.023262        Inf
test_mod(mod_omega, k = 6)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##  0.0011062  0.6139613  0.0219400  0.3405360 16.0000000  0.0005402        Inf
test_mod(mod_miu, k = 7)
## loocv_rmse     adj_r2 bp_pval.BP    sw_pval num_params  test_rmse   perc_err 
##   0.017161   0.894881   0.352920   0.928797  20.000000   0.009206  41.837402

Model Diagonostics (Plots)

diagnostics(mod_k, testit = FALSE)
[Figure 1.1. Model `k`]

[Figure 1.1. Model k]

diagnostics(mod_lamda, testit = FALSE)
[Figure 1.2. Model `lamda`]

[Figure 1.2. Model lamda]

diagnostics(mod_c, testit = FALSE)
[Figure 1.3. Model `c`]

[Figure 1.3. Model c]

diagnostics(mod_alpha, testit = FALSE)
[Figure 1.4. Model `alpha`]

[Figure 1.4. Model alpha]

diagnostics(mod_omega, testit = FALSE)
[Figure 1.5. Model `omega`]

[Figure 1.5. Model omega]

diagnostics(mod_miu, testit = FALSE)
[Figure 1.6. Model `miu`]

[Figure 1.6. Model miu]

Discussion

See project Homepage.

Appendix

Helper Functions

get_bp_decision = function(model, alpha) {
  decide = unname(bptest(model)$p.value < alpha)
  ifelse(decide, "Reject", "Fail to Reject")
}

get_bp_pval = function(model) {
  bptest(model)$p.value
}

get_sw_decision = function(model, alpha) {
  decide = unname(shapiro.test(resid(model))$p.value < alpha)
  ifelse(decide, "Reject", "Fail to Reject")
}

get_sw_pval = function(model) {
  shapiro.test(resid(model))$p.value
}

get_num_params = function(model) {
  length(coef(model))
}

get_loocv_rmse = function(model, is_log, k) {
  ifelse(
    is_log, 
    sqrt(mean(na.omit(((data_trn[, k] - exp(fitted(model))) / (1 - hatvalues(model))) ^ 2))),
    sqrt(mean((resid(model) / (1 - hatvalues(model))) ^ 2))
  )
}

get_adj_r2 = function(model) {
  summary(model)$adj.r.squared
}

test_mod = function(model, is_log = FALSE, k = 1){
  c(loocv_rmse = get_loocv_rmse(model, is_log, k), 
    adj_r2 = get_adj_r2(model), 
    bp_pval = get_bp_pval(model), 
    sw_pval = get_sw_pval(model), 
    num_params = get_num_params(model), 
    test_rmse = get_test_rmse(model, k), 
    perc_err = get_perc_err(model, k))
}

diagnostics = function(model, pcol = "grey", lcol = "dodgerblue", alpha = 0.05, plotit = TRUE, testit = TRUE){
  if (plotit){
    par(mfrow = c(1, 2), pty="s")
    
    plot(fitted(model), resid(model), col = "grey", pch = 20, 
         xlab = "Fitted", ylab = "Residual", 
         main = "Fitted versus Residuals")
    abline(h = 0, col = "darkorange", lwd = 2)
    
    qqnorm(resid(model), col = pcol)
    qqline(resid(model), col = lcol, lwd = 2)
  }
  if (testit){
    list(p_val = shapiro.test(resid(model))$p, 
         decision = ifelse(test = shapiro.test(resid(model))$p < alpha, 
                           yes = "Reject", no = "Fail to Reject"))
  }
}

get_test_rmse = function(model, k) {
  sqrt(mean((data_tst[, k] - predict(model, newdata = data_tst))^ 2))
}

get_perc_err = function(model, k) {
  actual = data_tst[, k]
  predicted = predict(model, newdata = data_tst)
  100 * mean((abs(actual - predicted)) / actual)
}