library(readr)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.2.1 ✔ dplyr 0.8.3
## ✔ tibble 2.1.3 ✔ stringr 1.4.0
## ✔ tidyr 1.0.0 ✔ forcats 0.4.0
## ✔ purrr 0.3.3
## ── Conflicts ──────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidyr)
library(knitr)
library(dplyr)
library(ggplot2)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(plotROC)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 3.0-1
combined_data <- readr::read_csv("https://raw.githubusercontent.com/5harad/openpolicing/master/results/data_for_figures/combined_data.csv")
## Parsed with column specification:
## cols(
## location = col_character(),
## state = col_character(),
## driver_race = col_character(),
## stops_per_year = col_double(),
## stop_rate = col_double(),
## search_rate = col_double(),
## consent_search_rate = col_double(),
## arrest_rate = col_double(),
## citation_rate_speeding_stops = col_double(),
## hit_rate = col_double(),
## inferred_threshold = col_double()
## )
open_policing <- combined_data %>% dplyr::select(1:8) %>%na.omit()
open_policing_binary <- open_policing %>% mutate(consent_search_rate = case_when(consent_search_rate == 0 ~ 0, TRUE ~ 1)) %>% rename('consent_search_bin' = consent_search_rate)
open_policing <- left_join(open_policing, open_policing_binary)
## Joining, by = c("location", "state", "driver_race", "stops_per_year", "stop_rate", "search_rate", "arrest_rate")
open_policing_binary_2 <- open_policing %>% mutate(consent_search_rate = case_when(consent_search_rate == 0 ~ 'no', TRUE ~ 'yes')) %>% rename('consent_search_cat' = consent_search_rate)
open_policing <- left_join(open_policing, open_policing_binary_2)
## Joining, by = c("location", "state", "driver_race", "stops_per_year", "stop_rate", "search_rate", "arrest_rate", "consent_search_bin")
manova_data <- manova(cbind(stops_per_year, stop_rate, search_rate, consent_search_rate, arrest_rate)~driver_race, data=combined_data)
summary(manova_data)
## Df Pillai approx F num Df den Df Pr(>F)
## driver_race 2 0.4135 30.651 10 1176 < 2.2e-16 ***
## Residuals 591
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(manova_data)
## Response stops_per_year :
## Df Sum Sq Mean Sq F value Pr(>F)
## driver_race 2 4.2621e+09 2131054561 77.54 < 2.2e-16 ***
## Residuals 591 1.6243e+10 27483191
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response stop_rate :
## Df Sum Sq Mean Sq F value Pr(>F)
## driver_race 2 20.55 10.275 5.486 0.004358 **
## Residuals 591 1106.95 1.873
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response search_rate :
## Df Sum Sq Mean Sq F value Pr(>F)
## driver_race 2 0.003109 0.00155452 30.773 1.934e-13 ***
## Residuals 591 0.029855 0.00005052
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response consent_search_rate :
## Df Sum Sq Mean Sq F value Pr(>F)
## driver_race 2 0.0003719 1.8597e-04 22.928 2.571e-10 ***
## Residuals 591 0.0047936 8.1110e-06
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response arrest_rate :
## Df Sum Sq Mean Sq F value Pr(>F)
## driver_race 2 0.08748 0.043741 65.998 < 2.2e-16 ***
## Residuals 591 0.39169 0.000663
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## 2094 observations deleted due to missingness
open_policing%>%group_by(driver_race)%>%summarize(mean(stops_per_year),mean(stop_rate),mean(search_rate),mean(consent_search_rate),mean(arrest_rate))
## # A tibble: 3 x 6
## driver_race `mean(stops_per… `mean(stop_rate… `mean(search_ra…
## <chr> <dbl> <dbl> <dbl>
## 1 Black 1964. 0.606 0.00894
## 2 Hispanic 1594. 0.236 0.00971
## 3 White 7444. 0.189 0.00453
## # … with 2 more variables: `mean(consent_search_rate)` <dbl>,
## # `mean(arrest_rate)` <dbl>
pairwise.t.test(open_policing$stops_per_year,open_policing$driver_race,
p.adj="none")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: open_policing$stops_per_year and open_policing$driver_race
##
## Black Hispanic
## Hispanic 0.48 -
## White <2e-16 <2e-16
##
## P value adjustment method: none
pairwise.t.test(open_policing$stop_rate,open_policing$driver_race,
p.adj="none")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: open_policing$stop_rate and open_policing$driver_race
##
## Black Hispanic
## Hispanic 0.0075 -
## White 0.0026 0.7310
##
## P value adjustment method: none
pairwise.t.test(open_policing$search_rate,open_policing$driver_race,
p.adj="none")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: open_policing$search_rate and open_policing$driver_race
##
## Black Hispanic
## Hispanic 0.28 -
## White 1.2e-09 1.2e-12
##
## P value adjustment method: none
pairwise.t.test(open_policing$consent_search_rate,open_policing$driver_race,
p.adj="none")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: open_policing$consent_search_rate and open_policing$driver_race
##
## Black Hispanic
## Hispanic 0.17 -
## White 5.9e-07 2.7e-10
##
## P value adjustment method: none
pairwise.t.test(open_policing$arrest_rate,open_policing$driver_race,
p.adj="none")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: open_policing$arrest_rate and open_policing$driver_race
##
## Black Hispanic
## Hispanic 1.7e-11 -
## White 7.3e-06 < 2e-16
##
## P value adjustment method: none
open_policing%>%group_by(consent_search_bin)%>%summarize(m=mean(arrest_rate))%>%summarize(diff(m))
## # A tibble: 1 x 1
## `diff(m)`
## <dbl>
## 1 0.00509
rand_dist<-vector()
for(i in 1:5000){
new<-data.frame(arrestrate=sample(open_policing$arrest_rate),consentsearchratebin=open_policing$consent_search_bin)
rand_dist[i]<-mean(new[new$consentsearchratebin=="1",]$arrestrate)-
mean(new[new$consentsearchratebin=="0",]$arrestrate)
}
mean(rand_dist > 0.005091409)*2 #pvalue
## [1] 0.0324
{hist(rand_dist,main="",ylab=""); abline(v = 0.005091409 ,col="red")}
open_policing$stop_rate_c <- open_policing$stop_rate - mean(open_policing$stop_rate)
fit_3<-lm(arrest_rate ~ consent_search_cat*stop_rate_c, data=open_policing)
summary(fit_3)
##
## Call:
## lm(formula = arrest_rate ~ consent_search_cat * stop_rate_c,
## data = open_policing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.033275 -0.017784 -0.008089 0.008261 0.171565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0280696 0.0020211 13.888 <2e-16 ***
## consent_search_catyes 0.0046618 0.0024761 1.883 0.0602 .
## stop_rate_c -0.0011040 0.0009124 -1.210 0.2268
## consent_search_catyes:stop_rate_c -0.0024267 0.0024254 -1.001 0.3175
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0283 on 590 degrees of freedom
## Multiple R-squared: 0.0137, Adjusted R-squared: 0.008681
## F-statistic: 2.731 on 3 and 590 DF, p-value: 0.04313
0.0011040. When a search is executed with consent, on average, the arrest_rate increases by 0.0046618. Assuming there is consent to search, the effect of the stop_rate is lesser by -0.0024267 compared to there being no stop.
ggplot(open_policing, aes(x=stop_rate, y=arrest_rate,group=consent_search_cat))+geom_point(aes(color=consent_search_cat))+
geom_smooth(method="lm",se=F,fullrange=T,aes(color=consent_search_cat))+
theme(legend.position=c(.9,.19))+xlab("")
resids<-fit_3$residuals
fitvals<-fit_3$fitted.values
ggplot()+geom_point(aes(fitvals,resids))+geom_hline(yintercept=0, color='red')
ggplot()+geom_histogram(aes(resids), bins=20)
ggplot(open_policing,aes(stop_rate,arrest_rate,color=consent_search_cat))+geom_point()
coeftest(fit_3, vcov = vcovHC(fit_3))[,1:2]
## Estimate Std. Error
## (Intercept) 0.028069625 0.002587965
## consent_search_catyes 0.004661783 0.002842918
## stop_rate_c -0.001103971 0.002490565
## consent_search_catyes:stop_rate_c -0.002426686 0.002696296
coeftest(fit_3)[,1:2]
## Estimate Std. Error
## (Intercept) 0.028069625 0.0020211369
## consent_search_catyes 0.004661783 0.0024760553
## stop_rate_c -0.001103971 0.0009124088
## consent_search_catyes:stop_rate_c -0.002426686 0.0024254340
summary(fit_3)$r.sq
## [1] 0.01369648
fit_4<-lm(arrest_rate ~ consent_search_cat + stop_rate_c, data=open_policing)
summary(fit_4)
##
## Call:
## lm(formula = arrest_rate ~ consent_search_cat + stop_rate_c,
## data = open_policing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.033110 -0.017775 -0.008275 0.008330 0.171400
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0281213 0.0020205 13.918 <2e-16 ***
## consent_search_catyes 0.0047656 0.0024739 1.926 0.0545 .
## stop_rate_c -0.0014474 0.0008454 -1.712 0.0874 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0283 on 591 degrees of freedom
## Multiple R-squared: 0.01202, Adjusted R-squared: 0.00868
## F-statistic: 3.596 on 2 and 591 DF, p-value: 0.02803
coeftest(fit_3)[,1:2]
## Estimate Std. Error
## (Intercept) 0.028069625 0.0020211369
## consent_search_catyes 0.004661783 0.0024760553
## stop_rate_c -0.001103971 0.0009124088
## consent_search_catyes:stop_rate_c -0.002426686 0.0024254340
samp_distn<-replicate(5000, {
boot_dat<-open_policing[sample(nrow(open_policing),replace=TRUE),]
fit_boot<-lm(arrest_rate ~ consent_search_cat*stop_rate_c,data=boot_dat)
coef(fit_boot)
})
samp_distn%>%t%>%as.data.frame%>%summarize_all(sd)
## (Intercept) consent_search_catyes stop_rate_c
## 1 0.002587852 0.00284409 0.004064892
## consent_search_catyes:stop_rate_c
## 1 0.004306461
fit_5<-glm(consent_search_bin~stops_per_year+stop_rate,data=open_policing,family=binomial(link="logit"))
coeftest(fit_5)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.8475e-01 1.1188e-01 5.2266 1.726e-07 ***
## stops_per_year 4.5829e-05 1.9649e-05 2.3324 0.01968 *
## stop_rate -1.1041e-01 8.4563e-02 -1.3057 0.19167
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(fit_5)%>%round(3)%>%data.frame
## .
## (Intercept) 0.585
## stops_per_year 0.000
## stop_rate -0.110
exp(coef(fit_5))%>%round(3)%>%data.frame
## .
## (Intercept) 1.795
## stops_per_year 1.000
## stop_rate 0.895
prob<-predict(fit_5,type="response")
pred<-ifelse(prob>.5,1,0)
table(truth=open_policing$consent_search_bin, prediction=pred)%>%addmargins
## prediction
## truth 0 1 Sum
## 0 2 195 197
## 1 1 396 397
## Sum 3 591 594
# Accuracy
(396+2)/594
## [1] 0.6700337
# Sensitivity (TPR)
396/397
## [1] 0.9974811
# Specificity (TNR)
2/3
## [1] 0.6666667
# Recall
396/591
## [1] 0.6700508
pca1<-princomp(open_policing[c('stop_rate','stops_per_year')])
open_policing$predictor<-pca1$scores[,1]
fit_6<-glm(consent_search_bin~predictor,data=open_policing,family="binomial")
open_policing$prob<-predict(fit_6,type="response")
open_policing$logit <- predict(fit_5)
ggplot(open_policing, aes(logit, fill = consent_search_cat)) + geom_density(alpha = 0.3) + geom_vline(xintercept = 0, lty = 2)
sens<-function(p,data=open_policing, y=consent_search_bin)mean(open_policing[open_policing$consent_search_bin==1,]$prob>p)
spec<-function(p,data=open_policing, y=consent_search_bin)mean(open_policing[open_policing$consent_search_bin==0,]$prob<p)
sensitivity<-sapply(seq(0,1,.01),sens, open_policing)
specificity<-sapply(seq(0,1,.01),spec, open_policing)
ROC1<-data.frame(sensitivity,specificity,cutoff=seq(0,1,.01))
ROC1$TPR<-sensitivity
ROC1$FPR<-1-specificity
ROC1%>%ggplot(aes(FPR,TPR))+geom_path(size=1.5)+geom_segment(aes(x=0,y=0,xend=1,yend=1),
lty=2)+
scale_x_continuous(limits = c(0,1))
class_diag<-function(probs,truth){
tab<-table(factor(probs>.5,levels=c("FALSE","TRUE")),truth)
acc=sum(diag(tab))/sum(tab)
sens=tab[2,2]/colSums(tab)[2]
spec=tab[1,1]/colSums(tab)[1]
ppv=tab[2,2]/rowSums(tab)[2]
if(is.numeric(truth)==FALSE & is.logical(truth)==FALSE) truth<-as.numeric(truth)-1
ord<-order(probs, decreasing=TRUE)
probs <- probs[ord]; truth <- truth[ord]
TPR=cumsum(truth)/max(1,sum(truth))
FPR=cumsum(!truth)/max(1,sum(!truth))
dup<-c(probs[-1]>=probs[-length(probs)], FALSE)
TPR<-c(0,TPR[!dup],1); FPR<-c(0,FPR[!dup],1)
n <- length(TPR)
auc<- sum( ((TPR[-1]+TPR[-n])/2) * (FPR[-1]-FPR[-n]) )
data.frame(acc,sens,spec,ppv,auc)}
set.seed(1234)
k=10
data_5<-open_policing[sample(nrow(open_policing)),]
folds<-cut(seq(1:nrow(open_policing)),breaks=k,labels=F)
diags<-NULL
for(i in 1:k){
train_5<-data_5[folds!=i,]
test_5<-data_5[folds==i,]
truth_5<-test_5$consent_search_bin
fit_7<-glm(consent_search_bin~stops_per_year+stop_rate,data=train_5,family="binomial")
probs_5<-predict(fit_7,newdata = test_5,type="response")
diags<-rbind(diags,class_diag(probs_5,truth_5))
}
apply(diags,2,mean)
## acc sens spec ppv auc
## 0.66827684 0.99478320 0.00819398 0.66911942 0.59093790
open_policing$location <- factor(open_policing$location)
open_policing$state <- factor(open_policing$state)
open_policing$driver_race <- factor(open_policing$driver_race)
open_policing$consent_search_cat <- factor(open_policing$consent_search_cat)
fit_lasso <- glm(consent_search_bin ~ -1 + location + state + driver_race + stops_per_year + stop_rate + search_rate + consent_search_rate + arrest_rate, data = open_policing,
family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model.matrix(fit_lasso) %>% head()
## locationA1 locationA2 locationA3 locationA4 locationA5 locationA6 locationA7
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## locationA8 locationADAMS COUNTY locationALACHUA COUNTY locationALAMOSA COUNTY
## 1 0 1 0 0
## 2 0 1 0 0
## 3 0 1 0 0
## 4 0 0 0 1
## 5 0 0 0 1
## 6 0 0 0 1
## locationARAPAHOE COUNTY locationARCHULETA COUNTY locationB2 locationB3
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## locationB4 locationB5 locationB6 locationB7 locationB8 locationBACA COUNTY
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## locationBAKER COUNTY locationBARNSTABLE COUNTY locationBAY COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationBENT COUNTY locationBERKSHIRE COUNTY locationBOULDER COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationBRADFORD COUNTY locationBREVARD COUNTY locationBRISTOL COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationBROOMFIELD COUNTY locationBROWARD COUNTY locationC1 locationC2
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## locationC3 locationC4 locationC5 locationC6 locationC7 locationC8
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## locationCALHOUN COUNTY locationCHAFFEE COUNTY locationCHARLOTTE COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationCHEYENNE COUNTY locationCITRUS COUNTY locationCLAY COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationCLEAR CREEK COUNTY locationCOLLIER COUNTY locationCOLUMBIA COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationCONEJOS COUNTY locationCOSTILLA COUNTY locationCROWLEY COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationCUSTER COUNTY locationD1 locationD2 locationD3 locationD4 locationD5
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## locationD6 locationD7 locationDELTA COUNTY locationDENVER COUNTY
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## locationDESOTO COUNTY locationDIXIE COUNTY locationDOLORES COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationDOUGLAS COUNTY locationDUKES COUNTY locationDUVAL COUNTY locationE1
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## locationE2 locationE3 locationE4 locationE5 locationE6 locationE7
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## locationEAGLE COUNTY locationEL PASO COUNTY locationELBERT COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationESCAMBIA COUNTY locationESSEX COUNTY locationF1 locationF2 locationF3
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## locationF4 locationF5 locationFLAGLER COUNTY locationFRANKLIN COUNTY
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## locationFREMONT COUNTY locationG1 locationG2 locationG3 locationG4 locationG5
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## locationG6 locationGADSDEN COUNTY locationGARFIELD COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationGILCHRIST COUNTY locationGILPIN COUNTY locationGLADES COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationGRAND COUNTY locationGULF COUNTY locationGUNNISON COUNTY locationH1
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## locationH2 locationH3 locationH4 locationH5 locationH6
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## locationHAMILTON COUNTY locationHAMPDEN COUNTY locationHAMPSHIRE COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationHARDEE COUNTY locationHENDRY COUNTY locationHERNANDO COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationHIGHLANDS COUNTY locationHILLSBOROUGH COUNTY locationHINSDALE COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationHOLMES COUNTY locationHUERFANO COUNTY locationINDIAN RIVER COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationJACKSON COUNTY locationJEFFERSON COUNTY locationKIOWA COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationKIT CARSON COUNTY locationLA PLATA COUNTY locationLAFAYETTE COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationLAKE COUNTY locationLARIMER COUNTY locationLAS ANIMAS COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationLEE COUNTY locationLEON COUNTY locationLEVY COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationLIBERTY COUNTY locationLINCOLN COUNTY locationLOGAN COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationMADISON COUNTY locationMANATEE COUNTY locationMARION COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationMARTIN COUNTY locationMESA COUNTY locationMIAMI-DADE COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationMIDDLESEX COUNTY locationMINERAL COUNTY locationMOFFAT COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationMONROE COUNTY locationMONTEZUMA COUNTY locationMONTROSE COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationMORGAN COUNTY locationNANTUCKET COUNTY locationNASSAU COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationNORFOLK COUNTY locationOKALOOSA COUNTY locationOKEECHOBEE COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationORANGE COUNTY locationOSCEOLA COUNTY locationOTERO COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationOURAY COUNTY locationPALM BEACH COUNTY locationPARK COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationPASCO COUNTY locationPHILLIPS COUNTY locationPINELLAS COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationPITKIN COUNTY locationPLYMOUTH COUNTY locationPOLK COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationPROWERS COUNTY locationPUEBLO COUNTY locationPUTNAM COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationRIO BLANCO COUNTY locationRIO GRANDE COUNTY locationROUTT COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationSAGUACHE COUNTY locationSAN JUAN COUNTY locationSAN MIGUEL COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationSANTA ROSA COUNTY locationSARASOTA COUNTY locationSEDGWICK COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationSEMINOLE COUNTY locationST. JOHNS COUNTY locationST. LUCIE COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationSUFFOLK COUNTY locationSUMMIT COUNTY locationSUMTER COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationSUWANNEE COUNTY locationTAYLOR COUNTY locationTELLER COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationUNION COUNTY locationVOLUSIA COUNTY locationWAKULLA COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationWALTON COUNTY locationWASHINGTON COUNTY locationWELD COUNTY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## locationWORCESTER COUNTY locationYUMA COUNTY stateFL stateMA stateNC
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## driver_raceHispanic driver_raceWhite stops_per_year stop_rate search_rate
## 1 0 0 791 0.077 0.005
## 2 1 0 5913 0.048 0.003
## 3 0 1 13800 0.069 0.003
## 4 0 0 80 0.506 0.017
## 5 1 0 1598 0.296 0.009
## 6 0 1 2951 0.447 0.004
## consent_search_rate arrest_rate
## 1 0.002 0.057
## 2 0.001 0.071
## 3 0.001 0.032
## 4 0.010 0.093
## 5 0.003 0.058
## 6 0.001 0.034
set.seed(1234)
x<-model.matrix(fit_lasso)
x<-scale(x)
y<-as.matrix(open_policing$consent_search_bin)
cv<-cv.glmnet(x,y,family='binomial')
lasso<-glmnet(x,y,family='binomial',lambda=cv$lambda.1se)
coef(cv)
## 205 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 19.83460791
## locationA1 .
## locationA2 .
## locationA3 .
## locationA4 .
## locationA5 .
## locationA6 .
## locationA7 .
## locationA8 .
## locationADAMS COUNTY .
## locationALACHUA COUNTY .
## locationALAMOSA COUNTY .
## locationARAPAHOE COUNTY .
## locationARCHULETA COUNTY .
## locationB2 .
## locationB3 .
## locationB4 .
## locationB5 .
## locationB6 .
## locationB7 .
## locationB8 .
## locationBACA COUNTY .
## locationBAKER COUNTY .
## locationBARNSTABLE COUNTY .
## locationBAY COUNTY .
## locationBENT COUNTY .
## locationBERKSHIRE COUNTY .
## locationBOULDER COUNTY .
## locationBRADFORD COUNTY .
## locationBREVARD COUNTY .
## locationBRISTOL COUNTY .
## locationBROOMFIELD COUNTY .
## locationBROWARD COUNTY .
## locationC1 .
## locationC2 .
## locationC3 .
## locationC4 .
## locationC5 .
## locationC6 .
## locationC7 .
## locationC8 .
## locationCALHOUN COUNTY .
## locationCHAFFEE COUNTY .
## locationCHARLOTTE COUNTY .
## locationCHEYENNE COUNTY .
## locationCITRUS COUNTY .
## locationCLAY COUNTY .
## locationCLEAR CREEK COUNTY .
## locationCOLLIER COUNTY .
## locationCOLUMBIA COUNTY .
## locationCONEJOS COUNTY .
## locationCOSTILLA COUNTY .
## locationCROWLEY COUNTY .
## locationCUSTER COUNTY .
## locationD1 .
## locationD2 .
## locationD3 .
## locationD4 .
## locationD5 .
## locationD6 .
## locationD7 .
## locationDELTA COUNTY .
## locationDENVER COUNTY .
## locationDESOTO COUNTY .
## locationDIXIE COUNTY .
## locationDOLORES COUNTY .
## locationDOUGLAS COUNTY .
## locationDUKES COUNTY .
## locationDUVAL COUNTY .
## locationE1 .
## locationE2 .
## locationE3 .
## locationE4 .
## locationE5 .
## locationE6 .
## locationE7 .
## locationEAGLE COUNTY .
## locationEL PASO COUNTY .
## locationELBERT COUNTY .
## locationESCAMBIA COUNTY .
## locationESSEX COUNTY .
## locationF1 .
## locationF2 .
## locationF3 .
## locationF4 .
## locationF5 .
## locationFLAGLER COUNTY .
## locationFRANKLIN COUNTY .
## locationFREMONT COUNTY .
## locationG1 .
## locationG2 .
## locationG3 .
## locationG4 .
## locationG5 .
## locationG6 .
## locationGADSDEN COUNTY .
## locationGARFIELD COUNTY .
## locationGILCHRIST COUNTY .
## locationGILPIN COUNTY .
## locationGLADES COUNTY .
## locationGRAND COUNTY .
## locationGULF COUNTY .
## locationGUNNISON COUNTY .
## locationH1 .
## locationH2 .
## locationH3 .
## locationH4 .
## locationH5 .
## locationH6 .
## locationHAMILTON COUNTY .
## locationHAMPDEN COUNTY .
## locationHAMPSHIRE COUNTY .
## locationHARDEE COUNTY .
## locationHENDRY COUNTY .
## locationHERNANDO COUNTY .
## locationHIGHLANDS COUNTY .
## locationHILLSBOROUGH COUNTY .
## locationHINSDALE COUNTY .
## locationHOLMES COUNTY .
## locationHUERFANO COUNTY .
## locationINDIAN RIVER COUNTY .
## locationJACKSON COUNTY .
## locationJEFFERSON COUNTY .
## locationKIOWA COUNTY .
## locationKIT CARSON COUNTY .
## locationLA PLATA COUNTY .
## locationLAFAYETTE COUNTY .
## locationLAKE COUNTY .
## locationLARIMER COUNTY .
## locationLAS ANIMAS COUNTY .
## locationLEE COUNTY .
## locationLEON COUNTY .
## locationLEVY COUNTY .
## locationLIBERTY COUNTY .
## locationLINCOLN COUNTY .
## locationLOGAN COUNTY .
## locationMADISON COUNTY .
## locationMANATEE COUNTY .
## locationMARION COUNTY .
## locationMARTIN COUNTY .
## locationMESA COUNTY .
## locationMIAMI-DADE COUNTY .
## locationMIDDLESEX COUNTY .
## locationMINERAL COUNTY .
## locationMOFFAT COUNTY .
## locationMONROE COUNTY .
## locationMONTEZUMA COUNTY .
## locationMONTROSE COUNTY .
## locationMORGAN COUNTY .
## locationNANTUCKET COUNTY .
## locationNASSAU COUNTY .
## locationNORFOLK COUNTY .
## locationOKALOOSA COUNTY .
## locationOKEECHOBEE COUNTY .
## locationORANGE COUNTY .
## locationOSCEOLA COUNTY .
## locationOTERO COUNTY .
## locationOURAY COUNTY .
## locationPALM BEACH COUNTY .
## locationPARK COUNTY .
## locationPASCO COUNTY .
## locationPHILLIPS COUNTY .
## locationPINELLAS COUNTY .
## locationPITKIN COUNTY .
## locationPLYMOUTH COUNTY .
## locationPOLK COUNTY .
## locationPROWERS COUNTY .
## locationPUEBLO COUNTY .
## locationPUTNAM COUNTY .
## locationRIO BLANCO COUNTY .
## locationRIO GRANDE COUNTY .
## locationROUTT COUNTY .
## locationSAGUACHE COUNTY .
## locationSAN JUAN COUNTY .
## locationSAN MIGUEL COUNTY .
## locationSANTA ROSA COUNTY .
## locationSARASOTA COUNTY .
## locationSEDGWICK COUNTY .
## locationSEMINOLE COUNTY .
## locationST. JOHNS COUNTY .
## locationST. LUCIE COUNTY .
## locationSUFFOLK COUNTY .
## locationSUMMIT COUNTY .
## locationSUMTER COUNTY .
## locationSUWANNEE COUNTY .
## locationTAYLOR COUNTY .
## locationTELLER COUNTY .
## locationUNION COUNTY .
## locationVOLUSIA COUNTY .
## locationWAKULLA COUNTY .
## locationWALTON COUNTY .
## locationWASHINGTON COUNTY .
## locationWELD COUNTY .
## locationWORCESTER COUNTY .
## locationYUMA COUNTY .
## stateFL 0.01277906
## stateMA .
## stateNC .
## driver_raceHispanic .
## driver_raceWhite .
## stops_per_year 0.11601297
## stop_rate .
## search_rate .
## consent_search_rate 40.93482829
## arrest_rate .
set.seed(1234)
data_6<-open_policing[sample(nrow(open_policing)),]
folds_6<-cut(seq(1:nrow(open_policing)),breaks=k,labels=F)
diags<-NULL
for(i in 1:k){
train_6<-data_6[folds_6!=i,]
test_6<-data_6[folds_6==i,]
truth_6<-test_6$consent_search_bin
fit_8<-glm(consent_search_bin~stops_per_year,data=train_6,family="binomial")
probs_6<-predict(fit_8,newdata = test_6,type="response")
preds_6<-ifelse(probs_6>.5,1,0)
diags<-rbind(diags,class_diag(probs_6,truth_6))
}
diags%>%summarize_all(mean)
## acc sens spec ppv auc
## 1 0.6683051 1 0 0.6683051 0.6226937