Project 2

library(readr)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.2.1     ✔ dplyr   0.8.3
## ✔ tibble  2.1.3     ✔ stringr 1.4.0
## ✔ tidyr   1.0.0     ✔ forcats 0.4.0
## ✔ purrr   0.3.3
## ── Conflicts ──────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidyr)
library(knitr)
library(dplyr)
library(ggplot2)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(plotROC)
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 3.0-1

0) My Dataset

combined_data <- readr::read_csv("https://raw.githubusercontent.com/5harad/openpolicing/master/results/data_for_figures/combined_data.csv")
## Parsed with column specification:
## cols(
##   location = col_character(),
##   state = col_character(),
##   driver_race = col_character(),
##   stops_per_year = col_double(),
##   stop_rate = col_double(),
##   search_rate = col_double(),
##   consent_search_rate = col_double(),
##   arrest_rate = col_double(),
##   citation_rate_speeding_stops = col_double(),
##   hit_rate = col_double(),
##   inferred_threshold = col_double()
## )
open_policing <- combined_data %>% dplyr::select(1:8) %>%na.omit() 

open_policing_binary <- open_policing %>% mutate(consent_search_rate = case_when(consent_search_rate == 0 ~ 0, TRUE ~ 1)) %>% rename('consent_search_bin' = consent_search_rate)

open_policing <- left_join(open_policing, open_policing_binary)
## Joining, by = c("location", "state", "driver_race", "stops_per_year", "stop_rate", "search_rate", "arrest_rate")
open_policing_binary_2 <- open_policing %>% mutate(consent_search_rate = case_when(consent_search_rate == 0 ~ 'no', TRUE ~ 'yes')) %>% rename('consent_search_cat' = consent_search_rate)

open_policing <- left_join(open_policing, open_policing_binary_2)
## Joining, by = c("location", "state", "driver_race", "stops_per_year", "stop_rate", "search_rate", "arrest_rate", "consent_search_bin")

1) MANOVA Testing

MANOVA

manova_data <- manova(cbind(stops_per_year, stop_rate, search_rate, consent_search_rate, arrest_rate)~driver_race, data=combined_data)

summary(manova_data)
##              Df Pillai approx F num Df den Df    Pr(>F)    
## driver_race   2 0.4135   30.651     10   1176 < 2.2e-16 ***
## Residuals   591                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ANOVA

summary.aov(manova_data)
##  Response stops_per_year :
##              Df     Sum Sq    Mean Sq F value    Pr(>F)    
## driver_race   2 4.2621e+09 2131054561   77.54 < 2.2e-16 ***
## Residuals   591 1.6243e+10   27483191                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response stop_rate :
##              Df  Sum Sq Mean Sq F value   Pr(>F)   
## driver_race   2   20.55  10.275   5.486 0.004358 **
## Residuals   591 1106.95   1.873                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response search_rate :
##              Df   Sum Sq    Mean Sq F value    Pr(>F)    
## driver_race   2 0.003109 0.00155452  30.773 1.934e-13 ***
## Residuals   591 0.029855 0.00005052                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response consent_search_rate :
##              Df    Sum Sq    Mean Sq F value    Pr(>F)    
## driver_race   2 0.0003719 1.8597e-04  22.928 2.571e-10 ***
## Residuals   591 0.0047936 8.1110e-06                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response arrest_rate :
##              Df  Sum Sq  Mean Sq F value    Pr(>F)    
## driver_race   2 0.08748 0.043741  65.998 < 2.2e-16 ***
## Residuals   591 0.39169 0.000663                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 2094 observations deleted due to missingness

Means of Each Group Across the Numerical Variables

open_policing%>%group_by(driver_race)%>%summarize(mean(stops_per_year),mean(stop_rate),mean(search_rate),mean(consent_search_rate),mean(arrest_rate))
## # A tibble: 3 x 6
##   driver_race `mean(stops_per… `mean(stop_rate… `mean(search_ra…
##   <chr>                  <dbl>            <dbl>            <dbl>
## 1 Black                  1964.            0.606          0.00894
## 2 Hispanic               1594.            0.236          0.00971
## 3 White                  7444.            0.189          0.00453
## # … with 2 more variables: `mean(consent_search_rate)` <dbl>,
## #   `mean(arrest_rate)` <dbl>

Post Hoc test for Stops per Year

pairwise.t.test(open_policing$stops_per_year,open_policing$driver_race,
p.adj="none")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  open_policing$stops_per_year and open_policing$driver_race 
## 
##          Black  Hispanic
## Hispanic 0.48   -       
## White    <2e-16 <2e-16  
## 
## P value adjustment method: none

Post Hoc test for Stop Rate

pairwise.t.test(open_policing$stop_rate,open_policing$driver_race,
p.adj="none")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  open_policing$stop_rate and open_policing$driver_race 
## 
##          Black  Hispanic
## Hispanic 0.0075 -       
## White    0.0026 0.7310  
## 
## P value adjustment method: none

Post Hoc test for Search Rate

pairwise.t.test(open_policing$search_rate,open_policing$driver_race,
p.adj="none")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  open_policing$search_rate and open_policing$driver_race 
## 
##          Black   Hispanic
## Hispanic 0.28    -       
## White    1.2e-09 1.2e-12 
## 
## P value adjustment method: none

Post Hoc test for Consent Search Rate

pairwise.t.test(open_policing$consent_search_rate,open_policing$driver_race,
p.adj="none")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  open_policing$consent_search_rate and open_policing$driver_race 
## 
##          Black   Hispanic
## Hispanic 0.17    -       
## White    5.9e-07 2.7e-10 
## 
## P value adjustment method: none

Post Hoc test for Arrest Rate

pairwise.t.test(open_policing$arrest_rate,open_policing$driver_race,
p.adj="none")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  open_policing$arrest_rate and open_policing$driver_race 
## 
##          Black   Hispanic
## Hispanic 1.7e-11 -       
## White    7.3e-06 < 2e-16 
## 
## P value adjustment method: none

Discussion

In total, 21 tests were performed; 1 MANOVA, 5 ANOVA, and 15 post hoc t tests. 0.05/21 = 0.002380952, which is the new level of significance that we will be looking at.

The MANOVA test results show significance(p val = 2.2e-16), so we should look at individual ANOVA’s for each numerical variable against driver_race to look for significance between those groups.

An ANOVA test for each of the 5 numerical variables showed significant except one variable, stop_rate. 3 post hoc tests where then ran for each of the 5 numerical variables tested to see which groups are significant whithin that numerical variable. But we only look at 4 of the numerical variables since one of them is not statistically significant.

In stops_per_year, there was significance seem between Whites and Blacks and Whites and Hispanics.

In search_rate, there was significance seem between Whites and Blacks and Whites and Hispanics.

In arrest_rate, there was significance seem between Whites and Blacks and Whites and Hispanics and Blacks and Hispanics.

In regards to the assumptions of the MANOVA, they most likely have been met, but there is a possibility that they weren’t. This is because the data that was collected by the project was a compilation of all the data that the project could get, and not random samples within all the traffic stops that were recorded by the state. The results of the post hoc tests do show that a lot of the results show significance of the numerical variables towards Whites versus Blacks and Hispanics which may show that there is some unnormality in the data. But there doesn’t seem to be any extreme outliers or extreme multicollinearity between any of the variables.

2) Randomization Test

Finding Mean of Distribution

open_policing%>%group_by(consent_search_bin)%>%summarize(m=mean(arrest_rate))%>%summarize(diff(m))
## # A tibble: 1 x 1
##   `diff(m)`
##       <dbl>
## 1   0.00509

Random Distribution and p value Result

rand_dist<-vector()
for(i in 1:5000){
new<-data.frame(arrestrate=sample(open_policing$arrest_rate),consentsearchratebin=open_policing$consent_search_bin)
rand_dist[i]<-mean(new[new$consentsearchratebin=="1",]$arrestrate)-
mean(new[new$consentsearchratebin=="0",]$arrestrate)
}

mean(rand_dist > 0.005091409)*2 #pvalue
## [1] 0.0324

Visualization of the Random Distribution

{hist(rand_dist,main="",ylab=""); abline(v = 0.005091409    ,col="red")}

Discussion

The results of the randomization tests show that the model is mostly normal with a slight skew towards the lower values. There is a great enough difference in the means that shows that there is a significant difference between the arrest rates of drivers that had or didn’t have consented searches. This was verified with the p value of 0.0328 which is < 0.05. The line of the mean in the histogram(shown in red) also shows the mean far away from the mean of the distribution, further arguing for the significance.

3) Linear Regression Model

Interpret Coefficient Estimates

open_policing$stop_rate_c <- open_policing$stop_rate - mean(open_policing$stop_rate)
fit_3<-lm(arrest_rate ~ consent_search_cat*stop_rate_c, data=open_policing)
summary(fit_3)
## 
## Call:
## lm(formula = arrest_rate ~ consent_search_cat * stop_rate_c, 
##     data = open_policing)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.033275 -0.017784 -0.008089  0.008261  0.171565 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        0.0280696  0.0020211  13.888   <2e-16 ***
## consent_search_catyes              0.0046618  0.0024761   1.883   0.0602 .  
## stop_rate_c                       -0.0011040  0.0009124  -1.210   0.2268    
## consent_search_catyes:stop_rate_c -0.0024267  0.0024254  -1.001   0.3175    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0283 on 590 degrees of freedom
## Multiple R-squared:  0.0137, Adjusted R-squared:  0.008681 
## F-statistic: 2.731 on 3 and 590 DF,  p-value: 0.04313

When stop_rate_c increases by 1, on average, the arrest_rate decreases by

0.0011040. When a search is executed with consent, on average, the arrest_rate increases by 0.0046618. Assuming there is consent to search, the effect of the stop_rate is lesser by -0.0024267 compared to there being no stop.

Plot Regression

ggplot(open_policing, aes(x=stop_rate, y=arrest_rate,group=consent_search_cat))+geom_point(aes(color=consent_search_cat))+
geom_smooth(method="lm",se=F,fullrange=T,aes(color=consent_search_cat))+
theme(legend.position=c(.9,.19))+xlab("")

Check Assumptions

resids<-fit_3$residuals
fitvals<-fit_3$fitted.values
ggplot()+geom_point(aes(fitvals,resids))+geom_hline(yintercept=0, color='red')

ggplot()+geom_histogram(aes(resids), bins=20)

ggplot(open_policing,aes(stop_rate,arrest_rate,color=consent_search_cat))+geom_point()

Regression Results with Robust Standard Error

coeftest(fit_3, vcov = vcovHC(fit_3))[,1:2]
##                                       Estimate  Std. Error
## (Intercept)                        0.028069625 0.002587965
## consent_search_catyes              0.004661783 0.002842918
## stop_rate_c                       -0.001103971 0.002490565
## consent_search_catyes:stop_rate_c -0.002426686 0.002696296
coeftest(fit_3)[,1:2]
##                                       Estimate   Std. Error
## (Intercept)                        0.028069625 0.0020211369
## consent_search_catyes              0.004661783 0.0024760553
## stop_rate_c                       -0.001103971 0.0009124088
## consent_search_catyes:stop_rate_c -0.002426686 0.0024254340

Overall, the standard errors before and after robust adjustments were near identical except for the stop_rate_c variable. The robust adjustment increased the standard error, making the p value for that variable greater, reducing that variables chance of being statistically significant.

Proportion of Variance in Outcome Explained by Model

summary(fit_3)$r.sq
## [1] 0.01369648

The proportion of the variation in the outcome that my model explains is 0.01369648.

Rerun the Regression but without Interactions

fit_4<-lm(arrest_rate ~ consent_search_cat + stop_rate_c, data=open_policing)
summary(fit_4)
## 
## Call:
## lm(formula = arrest_rate ~ consent_search_cat + stop_rate_c, 
##     data = open_policing)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.033110 -0.017775 -0.008275  0.008330  0.171400 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            0.0281213  0.0020205  13.918   <2e-16 ***
## consent_search_catyes  0.0047656  0.0024739   1.926   0.0545 .  
## stop_rate_c           -0.0014474  0.0008454  -1.712   0.0874 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0283 on 591 degrees of freedom
## Multiple R-squared:  0.01202,    Adjusted R-squared:  0.00868 
## F-statistic: 3.596 on 2 and 591 DF,  p-value: 0.02803

4) Bootstrapped Standard Errors

coeftest(fit_3)[,1:2]
##                                       Estimate   Std. Error
## (Intercept)                        0.028069625 0.0020211369
## consent_search_catyes              0.004661783 0.0024760553
## stop_rate_c                       -0.001103971 0.0009124088
## consent_search_catyes:stop_rate_c -0.002426686 0.0024254340
samp_distn<-replicate(5000, {
boot_dat<-open_policing[sample(nrow(open_policing),replace=TRUE),]
fit_boot<-lm(arrest_rate ~ consent_search_cat*stop_rate_c,data=boot_dat)
coef(fit_boot)
})
samp_distn%>%t%>%as.data.frame%>%summarize_all(sd)
##   (Intercept) consent_search_catyes stop_rate_c
## 1 0.002587852            0.00284409 0.004064892
##   consent_search_catyes:stop_rate_c
## 1                       0.004306461

5) Logistic Regression Model

Interpret Coeeficient Estimates

fit_5<-glm(consent_search_bin~stops_per_year+stop_rate,data=open_policing,family=binomial(link="logit"))

coeftest(fit_5)
## 
## z test of coefficients:
## 
##                   Estimate  Std. Error z value  Pr(>|z|)    
## (Intercept)     5.8475e-01  1.1188e-01  5.2266 1.726e-07 ***
## stops_per_year  4.5829e-05  1.9649e-05  2.3324   0.01968 *  
## stop_rate      -1.1041e-01  8.4563e-02 -1.3057   0.19167    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(fit_5)%>%round(3)%>%data.frame
##                     .
## (Intercept)     0.585
## stops_per_year  0.000
## stop_rate      -0.110
exp(coef(fit_5))%>%round(3)%>%data.frame
##                    .
## (Intercept)    1.795
## stops_per_year 1.000
## stop_rate      0.895

Report Confusin Matrix

prob<-predict(fit_5,type="response")
pred<-ifelse(prob>.5,1,0)
table(truth=open_policing$consent_search_bin, prediction=pred)%>%addmargins
##      prediction
## truth   0   1 Sum
##   0     2 195 197
##   1     1 396 397
##   Sum   3 591 594

Accuracy, Sensitivity, Specificity, Recall

# Accuracy
(396+2)/594
## [1] 0.6700337
# Sensitivity (TPR)
396/397
## [1] 0.9974811
# Specificity (TNR)
2/3
## [1] 0.6666667
# Recall
396/591
## [1] 0.6700508

The accucary was seen to be 0.9974811, the sensitivity 0.6700508, the specificity 0.6666667, and the recall 0.6700337. Overall, the sensitivity of the model was really high, but the overall precision and accuracy wasn’t nearly the same.

Plot Density of log-odds by Binary Outcome Variable

pca1<-princomp(open_policing[c('stop_rate','stops_per_year')])
open_policing$predictor<-pca1$scores[,1]

fit_6<-glm(consent_search_bin~predictor,data=open_policing,family="binomial")
open_policing$prob<-predict(fit_6,type="response")

open_policing$logit <- predict(fit_5)

ggplot(open_policing, aes(logit, fill = consent_search_cat)) + geom_density(alpha = 0.3) + geom_vline(xintercept = 0, lty = 2)

ROC Curve and AUC

sens<-function(p,data=open_policing, y=consent_search_bin)mean(open_policing[open_policing$consent_search_bin==1,]$prob>p)
spec<-function(p,data=open_policing, y=consent_search_bin)mean(open_policing[open_policing$consent_search_bin==0,]$prob<p)

sensitivity<-sapply(seq(0,1,.01),sens, open_policing)
specificity<-sapply(seq(0,1,.01),spec, open_policing)

ROC1<-data.frame(sensitivity,specificity,cutoff=seq(0,1,.01))

ROC1$TPR<-sensitivity
ROC1$FPR<-1-specificity

ROC1%>%ggplot(aes(FPR,TPR))+geom_path(size=1.5)+geom_segment(aes(x=0,y=0,xend=1,yend=1),
lty=2)+
scale_x_continuous(limits = c(0,1))

C10-fold CV and Report Accuracy, Sensitivity, and Recall

class_diag<-function(probs,truth){
tab<-table(factor(probs>.5,levels=c("FALSE","TRUE")),truth)
acc=sum(diag(tab))/sum(tab)
sens=tab[2,2]/colSums(tab)[2]
spec=tab[1,1]/colSums(tab)[1]
ppv=tab[2,2]/rowSums(tab)[2]
if(is.numeric(truth)==FALSE & is.logical(truth)==FALSE) truth<-as.numeric(truth)-1

ord<-order(probs, decreasing=TRUE)
probs <- probs[ord]; truth <- truth[ord]
TPR=cumsum(truth)/max(1,sum(truth))
FPR=cumsum(!truth)/max(1,sum(!truth))
dup<-c(probs[-1]>=probs[-length(probs)], FALSE)
TPR<-c(0,TPR[!dup],1); FPR<-c(0,FPR[!dup],1)
n <- length(TPR)
auc<- sum( ((TPR[-1]+TPR[-n])/2) * (FPR[-1]-FPR[-n]) )
data.frame(acc,sens,spec,ppv,auc)}
set.seed(1234)
k=10
data_5<-open_policing[sample(nrow(open_policing)),]
folds<-cut(seq(1:nrow(open_policing)),breaks=k,labels=F)
diags<-NULL
for(i in 1:k){
train_5<-data_5[folds!=i,]
test_5<-data_5[folds==i,]
truth_5<-test_5$consent_search_bin

fit_7<-glm(consent_search_bin~stops_per_year+stop_rate,data=train_5,family="binomial")
probs_5<-predict(fit_7,newdata = test_5,type="response")

diags<-rbind(diags,class_diag(probs_5,truth_5))
}

apply(diags,2,mean)
##        acc       sens       spec        ppv        auc 
## 0.66827684 0.99478320 0.00819398 0.66911942 0.59093790

6) LASSO Regression

open_policing$location <- factor(open_policing$location)
open_policing$state <- factor(open_policing$state)
open_policing$driver_race <- factor(open_policing$driver_race)
open_policing$consent_search_cat <- factor(open_policing$consent_search_cat)

fit_lasso <- glm(consent_search_bin ~ -1 + location + state + driver_race + stops_per_year + stop_rate + search_rate + consent_search_rate + arrest_rate, data = open_policing,
family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model.matrix(fit_lasso) %>% head()
##   locationA1 locationA2 locationA3 locationA4 locationA5 locationA6 locationA7
## 1          0          0          0          0          0          0          0
## 2          0          0          0          0          0          0          0
## 3          0          0          0          0          0          0          0
## 4          0          0          0          0          0          0          0
## 5          0          0          0          0          0          0          0
## 6          0          0          0          0          0          0          0
##   locationA8 locationADAMS COUNTY locationALACHUA COUNTY locationALAMOSA COUNTY
## 1          0                    1                      0                      0
## 2          0                    1                      0                      0
## 3          0                    1                      0                      0
## 4          0                    0                      0                      1
## 5          0                    0                      0                      1
## 6          0                    0                      0                      1
##   locationARAPAHOE COUNTY locationARCHULETA COUNTY locationB2 locationB3
## 1                       0                        0          0          0
## 2                       0                        0          0          0
## 3                       0                        0          0          0
## 4                       0                        0          0          0
## 5                       0                        0          0          0
## 6                       0                        0          0          0
##   locationB4 locationB5 locationB6 locationB7 locationB8 locationBACA COUNTY
## 1          0          0          0          0          0                   0
## 2          0          0          0          0          0                   0
## 3          0          0          0          0          0                   0
## 4          0          0          0          0          0                   0
## 5          0          0          0          0          0                   0
## 6          0          0          0          0          0                   0
##   locationBAKER COUNTY locationBARNSTABLE COUNTY locationBAY COUNTY
## 1                    0                         0                  0
## 2                    0                         0                  0
## 3                    0                         0                  0
## 4                    0                         0                  0
## 5                    0                         0                  0
## 6                    0                         0                  0
##   locationBENT COUNTY locationBERKSHIRE COUNTY locationBOULDER COUNTY
## 1                   0                        0                      0
## 2                   0                        0                      0
## 3                   0                        0                      0
## 4                   0                        0                      0
## 5                   0                        0                      0
## 6                   0                        0                      0
##   locationBRADFORD COUNTY locationBREVARD COUNTY locationBRISTOL COUNTY
## 1                       0                      0                      0
## 2                       0                      0                      0
## 3                       0                      0                      0
## 4                       0                      0                      0
## 5                       0                      0                      0
## 6                       0                      0                      0
##   locationBROOMFIELD COUNTY locationBROWARD COUNTY locationC1 locationC2
## 1                         0                      0          0          0
## 2                         0                      0          0          0
## 3                         0                      0          0          0
## 4                         0                      0          0          0
## 5                         0                      0          0          0
## 6                         0                      0          0          0
##   locationC3 locationC4 locationC5 locationC6 locationC7 locationC8
## 1          0          0          0          0          0          0
## 2          0          0          0          0          0          0
## 3          0          0          0          0          0          0
## 4          0          0          0          0          0          0
## 5          0          0          0          0          0          0
## 6          0          0          0          0          0          0
##   locationCALHOUN COUNTY locationCHAFFEE COUNTY locationCHARLOTTE COUNTY
## 1                      0                      0                        0
## 2                      0                      0                        0
## 3                      0                      0                        0
## 4                      0                      0                        0
## 5                      0                      0                        0
## 6                      0                      0                        0
##   locationCHEYENNE COUNTY locationCITRUS COUNTY locationCLAY COUNTY
## 1                       0                     0                   0
## 2                       0                     0                   0
## 3                       0                     0                   0
## 4                       0                     0                   0
## 5                       0                     0                   0
## 6                       0                     0                   0
##   locationCLEAR CREEK COUNTY locationCOLLIER COUNTY locationCOLUMBIA COUNTY
## 1                          0                      0                       0
## 2                          0                      0                       0
## 3                          0                      0                       0
## 4                          0                      0                       0
## 5                          0                      0                       0
## 6                          0                      0                       0
##   locationCONEJOS COUNTY locationCOSTILLA COUNTY locationCROWLEY COUNTY
## 1                      0                       0                      0
## 2                      0                       0                      0
## 3                      0                       0                      0
## 4                      0                       0                      0
## 5                      0                       0                      0
## 6                      0                       0                      0
##   locationCUSTER COUNTY locationD1 locationD2 locationD3 locationD4 locationD5
## 1                     0          0          0          0          0          0
## 2                     0          0          0          0          0          0
## 3                     0          0          0          0          0          0
## 4                     0          0          0          0          0          0
## 5                     0          0          0          0          0          0
## 6                     0          0          0          0          0          0
##   locationD6 locationD7 locationDELTA COUNTY locationDENVER COUNTY
## 1          0          0                    0                     0
## 2          0          0                    0                     0
## 3          0          0                    0                     0
## 4          0          0                    0                     0
## 5          0          0                    0                     0
## 6          0          0                    0                     0
##   locationDESOTO COUNTY locationDIXIE COUNTY locationDOLORES COUNTY
## 1                     0                    0                      0
## 2                     0                    0                      0
## 3                     0                    0                      0
## 4                     0                    0                      0
## 5                     0                    0                      0
## 6                     0                    0                      0
##   locationDOUGLAS COUNTY locationDUKES COUNTY locationDUVAL COUNTY locationE1
## 1                      0                    0                    0          0
## 2                      0                    0                    0          0
## 3                      0                    0                    0          0
## 4                      0                    0                    0          0
## 5                      0                    0                    0          0
## 6                      0                    0                    0          0
##   locationE2 locationE3 locationE4 locationE5 locationE6 locationE7
## 1          0          0          0          0          0          0
## 2          0          0          0          0          0          0
## 3          0          0          0          0          0          0
## 4          0          0          0          0          0          0
## 5          0          0          0          0          0          0
## 6          0          0          0          0          0          0
##   locationEAGLE COUNTY locationEL PASO COUNTY locationELBERT COUNTY
## 1                    0                      0                     0
## 2                    0                      0                     0
## 3                    0                      0                     0
## 4                    0                      0                     0
## 5                    0                      0                     0
## 6                    0                      0                     0
##   locationESCAMBIA COUNTY locationESSEX COUNTY locationF1 locationF2 locationF3
## 1                       0                    0          0          0          0
## 2                       0                    0          0          0          0
## 3                       0                    0          0          0          0
## 4                       0                    0          0          0          0
## 5                       0                    0          0          0          0
## 6                       0                    0          0          0          0
##   locationF4 locationF5 locationFLAGLER COUNTY locationFRANKLIN COUNTY
## 1          0          0                      0                       0
## 2          0          0                      0                       0
## 3          0          0                      0                       0
## 4          0          0                      0                       0
## 5          0          0                      0                       0
## 6          0          0                      0                       0
##   locationFREMONT COUNTY locationG1 locationG2 locationG3 locationG4 locationG5
## 1                      0          0          0          0          0          0
## 2                      0          0          0          0          0          0
## 3                      0          0          0          0          0          0
## 4                      0          0          0          0          0          0
## 5                      0          0          0          0          0          0
## 6                      0          0          0          0          0          0
##   locationG6 locationGADSDEN COUNTY locationGARFIELD COUNTY
## 1          0                      0                       0
## 2          0                      0                       0
## 3          0                      0                       0
## 4          0                      0                       0
## 5          0                      0                       0
## 6          0                      0                       0
##   locationGILCHRIST COUNTY locationGILPIN COUNTY locationGLADES COUNTY
## 1                        0                     0                     0
## 2                        0                     0                     0
## 3                        0                     0                     0
## 4                        0                     0                     0
## 5                        0                     0                     0
## 6                        0                     0                     0
##   locationGRAND COUNTY locationGULF COUNTY locationGUNNISON COUNTY locationH1
## 1                    0                   0                       0          0
## 2                    0                   0                       0          0
## 3                    0                   0                       0          0
## 4                    0                   0                       0          0
## 5                    0                   0                       0          0
## 6                    0                   0                       0          0
##   locationH2 locationH3 locationH4 locationH5 locationH6
## 1          0          0          0          0          0
## 2          0          0          0          0          0
## 3          0          0          0          0          0
## 4          0          0          0          0          0
## 5          0          0          0          0          0
## 6          0          0          0          0          0
##   locationHAMILTON COUNTY locationHAMPDEN COUNTY locationHAMPSHIRE COUNTY
## 1                       0                      0                        0
## 2                       0                      0                        0
## 3                       0                      0                        0
## 4                       0                      0                        0
## 5                       0                      0                        0
## 6                       0                      0                        0
##   locationHARDEE COUNTY locationHENDRY COUNTY locationHERNANDO COUNTY
## 1                     0                     0                       0
## 2                     0                     0                       0
## 3                     0                     0                       0
## 4                     0                     0                       0
## 5                     0                     0                       0
## 6                     0                     0                       0
##   locationHIGHLANDS COUNTY locationHILLSBOROUGH COUNTY locationHINSDALE COUNTY
## 1                        0                           0                       0
## 2                        0                           0                       0
## 3                        0                           0                       0
## 4                        0                           0                       0
## 5                        0                           0                       0
## 6                        0                           0                       0
##   locationHOLMES COUNTY locationHUERFANO COUNTY locationINDIAN RIVER COUNTY
## 1                     0                       0                           0
## 2                     0                       0                           0
## 3                     0                       0                           0
## 4                     0                       0                           0
## 5                     0                       0                           0
## 6                     0                       0                           0
##   locationJACKSON COUNTY locationJEFFERSON COUNTY locationKIOWA COUNTY
## 1                      0                        0                    0
## 2                      0                        0                    0
## 3                      0                        0                    0
## 4                      0                        0                    0
## 5                      0                        0                    0
## 6                      0                        0                    0
##   locationKIT CARSON COUNTY locationLA PLATA COUNTY locationLAFAYETTE COUNTY
## 1                         0                       0                        0
## 2                         0                       0                        0
## 3                         0                       0                        0
## 4                         0                       0                        0
## 5                         0                       0                        0
## 6                         0                       0                        0
##   locationLAKE COUNTY locationLARIMER COUNTY locationLAS ANIMAS COUNTY
## 1                   0                      0                         0
## 2                   0                      0                         0
## 3                   0                      0                         0
## 4                   0                      0                         0
## 5                   0                      0                         0
## 6                   0                      0                         0
##   locationLEE COUNTY locationLEON COUNTY locationLEVY COUNTY
## 1                  0                   0                   0
## 2                  0                   0                   0
## 3                  0                   0                   0
## 4                  0                   0                   0
## 5                  0                   0                   0
## 6                  0                   0                   0
##   locationLIBERTY COUNTY locationLINCOLN COUNTY locationLOGAN COUNTY
## 1                      0                      0                    0
## 2                      0                      0                    0
## 3                      0                      0                    0
## 4                      0                      0                    0
## 5                      0                      0                    0
## 6                      0                      0                    0
##   locationMADISON COUNTY locationMANATEE COUNTY locationMARION COUNTY
## 1                      0                      0                     0
## 2                      0                      0                     0
## 3                      0                      0                     0
## 4                      0                      0                     0
## 5                      0                      0                     0
## 6                      0                      0                     0
##   locationMARTIN COUNTY locationMESA COUNTY locationMIAMI-DADE COUNTY
## 1                     0                   0                         0
## 2                     0                   0                         0
## 3                     0                   0                         0
## 4                     0                   0                         0
## 5                     0                   0                         0
## 6                     0                   0                         0
##   locationMIDDLESEX COUNTY locationMINERAL COUNTY locationMOFFAT COUNTY
## 1                        0                      0                     0
## 2                        0                      0                     0
## 3                        0                      0                     0
## 4                        0                      0                     0
## 5                        0                      0                     0
## 6                        0                      0                     0
##   locationMONROE COUNTY locationMONTEZUMA COUNTY locationMONTROSE COUNTY
## 1                     0                        0                       0
## 2                     0                        0                       0
## 3                     0                        0                       0
## 4                     0                        0                       0
## 5                     0                        0                       0
## 6                     0                        0                       0
##   locationMORGAN COUNTY locationNANTUCKET COUNTY locationNASSAU COUNTY
## 1                     0                        0                     0
## 2                     0                        0                     0
## 3                     0                        0                     0
## 4                     0                        0                     0
## 5                     0                        0                     0
## 6                     0                        0                     0
##   locationNORFOLK COUNTY locationOKALOOSA COUNTY locationOKEECHOBEE COUNTY
## 1                      0                       0                         0
## 2                      0                       0                         0
## 3                      0                       0                         0
## 4                      0                       0                         0
## 5                      0                       0                         0
## 6                      0                       0                         0
##   locationORANGE COUNTY locationOSCEOLA COUNTY locationOTERO COUNTY
## 1                     0                      0                    0
## 2                     0                      0                    0
## 3                     0                      0                    0
## 4                     0                      0                    0
## 5                     0                      0                    0
## 6                     0                      0                    0
##   locationOURAY COUNTY locationPALM BEACH COUNTY locationPARK COUNTY
## 1                    0                         0                   0
## 2                    0                         0                   0
## 3                    0                         0                   0
## 4                    0                         0                   0
## 5                    0                         0                   0
## 6                    0                         0                   0
##   locationPASCO COUNTY locationPHILLIPS COUNTY locationPINELLAS COUNTY
## 1                    0                       0                       0
## 2                    0                       0                       0
## 3                    0                       0                       0
## 4                    0                       0                       0
## 5                    0                       0                       0
## 6                    0                       0                       0
##   locationPITKIN COUNTY locationPLYMOUTH COUNTY locationPOLK COUNTY
## 1                     0                       0                   0
## 2                     0                       0                   0
## 3                     0                       0                   0
## 4                     0                       0                   0
## 5                     0                       0                   0
## 6                     0                       0                   0
##   locationPROWERS COUNTY locationPUEBLO COUNTY locationPUTNAM COUNTY
## 1                      0                     0                     0
## 2                      0                     0                     0
## 3                      0                     0                     0
## 4                      0                     0                     0
## 5                      0                     0                     0
## 6                      0                     0                     0
##   locationRIO BLANCO COUNTY locationRIO GRANDE COUNTY locationROUTT COUNTY
## 1                         0                         0                    0
## 2                         0                         0                    0
## 3                         0                         0                    0
## 4                         0                         0                    0
## 5                         0                         0                    0
## 6                         0                         0                    0
##   locationSAGUACHE COUNTY locationSAN JUAN COUNTY locationSAN MIGUEL COUNTY
## 1                       0                       0                         0
## 2                       0                       0                         0
## 3                       0                       0                         0
## 4                       0                       0                         0
## 5                       0                       0                         0
## 6                       0                       0                         0
##   locationSANTA ROSA COUNTY locationSARASOTA COUNTY locationSEDGWICK COUNTY
## 1                         0                       0                       0
## 2                         0                       0                       0
## 3                         0                       0                       0
## 4                         0                       0                       0
## 5                         0                       0                       0
## 6                         0                       0                       0
##   locationSEMINOLE COUNTY locationST. JOHNS COUNTY locationST. LUCIE COUNTY
## 1                       0                        0                        0
## 2                       0                        0                        0
## 3                       0                        0                        0
## 4                       0                        0                        0
## 5                       0                        0                        0
## 6                       0                        0                        0
##   locationSUFFOLK COUNTY locationSUMMIT COUNTY locationSUMTER COUNTY
## 1                      0                     0                     0
## 2                      0                     0                     0
## 3                      0                     0                     0
## 4                      0                     0                     0
## 5                      0                     0                     0
## 6                      0                     0                     0
##   locationSUWANNEE COUNTY locationTAYLOR COUNTY locationTELLER COUNTY
## 1                       0                     0                     0
## 2                       0                     0                     0
## 3                       0                     0                     0
## 4                       0                     0                     0
## 5                       0                     0                     0
## 6                       0                     0                     0
##   locationUNION COUNTY locationVOLUSIA COUNTY locationWAKULLA COUNTY
## 1                    0                      0                      0
## 2                    0                      0                      0
## 3                    0                      0                      0
## 4                    0                      0                      0
## 5                    0                      0                      0
## 6                    0                      0                      0
##   locationWALTON COUNTY locationWASHINGTON COUNTY locationWELD COUNTY
## 1                     0                         0                   0
## 2                     0                         0                   0
## 3                     0                         0                   0
## 4                     0                         0                   0
## 5                     0                         0                   0
## 6                     0                         0                   0
##   locationWORCESTER COUNTY locationYUMA COUNTY stateFL stateMA stateNC
## 1                        0                   0       0       0       0
## 2                        0                   0       0       0       0
## 3                        0                   0       0       0       0
## 4                        0                   0       0       0       0
## 5                        0                   0       0       0       0
## 6                        0                   0       0       0       0
##   driver_raceHispanic driver_raceWhite stops_per_year stop_rate search_rate
## 1                   0                0            791     0.077       0.005
## 2                   1                0           5913     0.048       0.003
## 3                   0                1          13800     0.069       0.003
## 4                   0                0             80     0.506       0.017
## 5                   1                0           1598     0.296       0.009
## 6                   0                1           2951     0.447       0.004
##   consent_search_rate arrest_rate
## 1               0.002       0.057
## 2               0.001       0.071
## 3               0.001       0.032
## 4               0.010       0.093
## 5               0.003       0.058
## 6               0.001       0.034
set.seed(1234)

x<-model.matrix(fit_lasso)
x<-scale(x)
y<-as.matrix(open_policing$consent_search_bin)

cv<-cv.glmnet(x,y,family='binomial')
lasso<-glmnet(x,y,family='binomial',lambda=cv$lambda.1se)
coef(cv)
## 205 x 1 sparse Matrix of class "dgCMatrix"
##                                       1
## (Intercept)                 19.83460791
## locationA1                   .         
## locationA2                   .         
## locationA3                   .         
## locationA4                   .         
## locationA5                   .         
## locationA6                   .         
## locationA7                   .         
## locationA8                   .         
## locationADAMS COUNTY         .         
## locationALACHUA COUNTY       .         
## locationALAMOSA COUNTY       .         
## locationARAPAHOE COUNTY      .         
## locationARCHULETA COUNTY     .         
## locationB2                   .         
## locationB3                   .         
## locationB4                   .         
## locationB5                   .         
## locationB6                   .         
## locationB7                   .         
## locationB8                   .         
## locationBACA COUNTY          .         
## locationBAKER COUNTY         .         
## locationBARNSTABLE COUNTY    .         
## locationBAY COUNTY           .         
## locationBENT COUNTY          .         
## locationBERKSHIRE COUNTY     .         
## locationBOULDER COUNTY       .         
## locationBRADFORD COUNTY      .         
## locationBREVARD COUNTY       .         
## locationBRISTOL COUNTY       .         
## locationBROOMFIELD COUNTY    .         
## locationBROWARD COUNTY       .         
## locationC1                   .         
## locationC2                   .         
## locationC3                   .         
## locationC4                   .         
## locationC5                   .         
## locationC6                   .         
## locationC7                   .         
## locationC8                   .         
## locationCALHOUN COUNTY       .         
## locationCHAFFEE COUNTY       .         
## locationCHARLOTTE COUNTY     .         
## locationCHEYENNE COUNTY      .         
## locationCITRUS COUNTY        .         
## locationCLAY COUNTY          .         
## locationCLEAR CREEK COUNTY   .         
## locationCOLLIER COUNTY       .         
## locationCOLUMBIA COUNTY      .         
## locationCONEJOS COUNTY       .         
## locationCOSTILLA COUNTY      .         
## locationCROWLEY COUNTY       .         
## locationCUSTER COUNTY        .         
## locationD1                   .         
## locationD2                   .         
## locationD3                   .         
## locationD4                   .         
## locationD5                   .         
## locationD6                   .         
## locationD7                   .         
## locationDELTA COUNTY         .         
## locationDENVER COUNTY        .         
## locationDESOTO COUNTY        .         
## locationDIXIE COUNTY         .         
## locationDOLORES COUNTY       .         
## locationDOUGLAS COUNTY       .         
## locationDUKES COUNTY         .         
## locationDUVAL COUNTY         .         
## locationE1                   .         
## locationE2                   .         
## locationE3                   .         
## locationE4                   .         
## locationE5                   .         
## locationE6                   .         
## locationE7                   .         
## locationEAGLE COUNTY         .         
## locationEL PASO COUNTY       .         
## locationELBERT COUNTY        .         
## locationESCAMBIA COUNTY      .         
## locationESSEX COUNTY         .         
## locationF1                   .         
## locationF2                   .         
## locationF3                   .         
## locationF4                   .         
## locationF5                   .         
## locationFLAGLER COUNTY       .         
## locationFRANKLIN COUNTY      .         
## locationFREMONT COUNTY       .         
## locationG1                   .         
## locationG2                   .         
## locationG3                   .         
## locationG4                   .         
## locationG5                   .         
## locationG6                   .         
## locationGADSDEN COUNTY       .         
## locationGARFIELD COUNTY      .         
## locationGILCHRIST COUNTY     .         
## locationGILPIN COUNTY        .         
## locationGLADES COUNTY        .         
## locationGRAND COUNTY         .         
## locationGULF COUNTY          .         
## locationGUNNISON COUNTY      .         
## locationH1                   .         
## locationH2                   .         
## locationH3                   .         
## locationH4                   .         
## locationH5                   .         
## locationH6                   .         
## locationHAMILTON COUNTY      .         
## locationHAMPDEN COUNTY       .         
## locationHAMPSHIRE COUNTY     .         
## locationHARDEE COUNTY        .         
## locationHENDRY COUNTY        .         
## locationHERNANDO COUNTY      .         
## locationHIGHLANDS COUNTY     .         
## locationHILLSBOROUGH COUNTY  .         
## locationHINSDALE COUNTY      .         
## locationHOLMES COUNTY        .         
## locationHUERFANO COUNTY      .         
## locationINDIAN RIVER COUNTY  .         
## locationJACKSON COUNTY       .         
## locationJEFFERSON COUNTY     .         
## locationKIOWA COUNTY         .         
## locationKIT CARSON COUNTY    .         
## locationLA PLATA COUNTY      .         
## locationLAFAYETTE COUNTY     .         
## locationLAKE COUNTY          .         
## locationLARIMER COUNTY       .         
## locationLAS ANIMAS COUNTY    .         
## locationLEE COUNTY           .         
## locationLEON COUNTY          .         
## locationLEVY COUNTY          .         
## locationLIBERTY COUNTY       .         
## locationLINCOLN COUNTY       .         
## locationLOGAN COUNTY         .         
## locationMADISON COUNTY       .         
## locationMANATEE COUNTY       .         
## locationMARION COUNTY        .         
## locationMARTIN COUNTY        .         
## locationMESA COUNTY          .         
## locationMIAMI-DADE COUNTY    .         
## locationMIDDLESEX COUNTY     .         
## locationMINERAL COUNTY       .         
## locationMOFFAT COUNTY        .         
## locationMONROE COUNTY        .         
## locationMONTEZUMA COUNTY     .         
## locationMONTROSE COUNTY      .         
## locationMORGAN COUNTY        .         
## locationNANTUCKET COUNTY     .         
## locationNASSAU COUNTY        .         
## locationNORFOLK COUNTY       .         
## locationOKALOOSA COUNTY      .         
## locationOKEECHOBEE COUNTY    .         
## locationORANGE COUNTY        .         
## locationOSCEOLA COUNTY       .         
## locationOTERO COUNTY         .         
## locationOURAY COUNTY         .         
## locationPALM BEACH COUNTY    .         
## locationPARK COUNTY          .         
## locationPASCO COUNTY         .         
## locationPHILLIPS COUNTY      .         
## locationPINELLAS COUNTY      .         
## locationPITKIN COUNTY        .         
## locationPLYMOUTH COUNTY      .         
## locationPOLK COUNTY          .         
## locationPROWERS COUNTY       .         
## locationPUEBLO COUNTY        .         
## locationPUTNAM COUNTY        .         
## locationRIO BLANCO COUNTY    .         
## locationRIO GRANDE COUNTY    .         
## locationROUTT COUNTY         .         
## locationSAGUACHE COUNTY      .         
## locationSAN JUAN COUNTY      .         
## locationSAN MIGUEL COUNTY    .         
## locationSANTA ROSA COUNTY    .         
## locationSARASOTA COUNTY      .         
## locationSEDGWICK COUNTY      .         
## locationSEMINOLE COUNTY      .         
## locationST. JOHNS COUNTY     .         
## locationST. LUCIE COUNTY     .         
## locationSUFFOLK COUNTY       .         
## locationSUMMIT COUNTY        .         
## locationSUMTER COUNTY        .         
## locationSUWANNEE COUNTY      .         
## locationTAYLOR COUNTY        .         
## locationTELLER COUNTY        .         
## locationUNION COUNTY         .         
## locationVOLUSIA COUNTY       .         
## locationWAKULLA COUNTY       .         
## locationWALTON COUNTY        .         
## locationWASHINGTON COUNTY    .         
## locationWELD COUNTY          .         
## locationWORCESTER COUNTY     .         
## locationYUMA COUNTY          .         
## stateFL                      0.01277906
## stateMA                      .         
## stateNC                      .         
## driver_raceHispanic          .         
## driver_raceWhite             .         
## stops_per_year               0.11601297
## stop_rate                    .         
## search_rate                  .         
## consent_search_rate         40.93482829
## arrest_rate                  .
set.seed(1234)

data_6<-open_policing[sample(nrow(open_policing)),]
folds_6<-cut(seq(1:nrow(open_policing)),breaks=k,labels=F)

diags<-NULL
for(i in 1:k){
train_6<-data_6[folds_6!=i,]
test_6<-data_6[folds_6==i,]
truth_6<-test_6$consent_search_bin
fit_8<-glm(consent_search_bin~stops_per_year,data=train_6,family="binomial")
probs_6<-predict(fit_8,newdata = test_6,type="response")
preds_6<-ifelse(probs_6>.5,1,0)
diags<-rbind(diags,class_diag(probs_6,truth_6))
}

diags%>%summarize_all(mean)
##         acc sens spec       ppv       auc
## 1 0.6683051    1    0 0.6683051 0.6226937

Comparing these LASSO results to the out of sample accuracy, the LASSO results had a lower specificity, but had a greater accuracy, sensitivity, precision, and AUC compared to part 5. This means higher proportion of correctly classified cases, higher true positive rate, greater proportion of those who were consented to a search that were correctly predicted, and greater true positive rate according to the AUC.