## Warning: package 'DiagrammeR' was built under R version 4.0.1
## Warning: package 'caret' was built under R version 4.0.1
## 
## Call:
## glm(formula = repair_try ~ age + gender + edu + device_count_5_yr + 
##     miss_dev + dump_reason_new + dump_reason_old + dump_reason_break + 
##     dump_reason_theft + dump_reason_slow + dump_reason_lag + 
##     slt_lack_tu + slt_lack_lang + slt_lack_notint + slt_lack_fear + 
##     slt_lack_parts + slt_lack_repairer + rprd_usage_chlng_No + 
##     rprd_usage_chlng_reluc + rprd_usage_chlng_dur + rprd_usage_chlng_fault + 
##     rpr_missing_trait_behave + rpr_missing_trait_ineff + rpr_missing_trait_harm + 
##     rpr_missing_trait_hard + rpr_missing_trait_wage + rpr_missing_trait_trust + 
##     rpr_missing_trait_gender + bad_rep_exp + dev_tknto_rec_Y + 
##     dev_rec_chlng_fair_price + dev_rec_chlng_usable + dev_rec_chlng_datasec + 
##     dev_rec_chlng_env_poll + dev_rec_chlng_hard_find + will_dev_rec + 
##     did_with_data_Y + did_with_device_econ, family = binomial, 
##     data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5975  -0.8572   0.4249   0.7828   1.9465  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -3.81453  176.55050  -0.022 0.982762    
## age1                       3.44412  176.55067   0.020 0.984436    
## age2                       2.41671  176.54929   0.014 0.989078    
## age3                       3.32647  176.54948   0.019 0.984967    
## age4                       3.15159  176.55392   0.018 0.985758    
## genderWoman               -0.76601    0.33177  -2.309 0.020952 *  
## eduBachelors              -0.45058    0.32919  -1.369 0.171072    
## eduMasters                -1.07114    0.55062  -1.945 0.051733 .  
## device_count_5_yr          0.11875    0.06172   1.924 0.054337 .  
## miss_dev1                  0.27342    0.29558   0.925 0.354952    
## dump_reason_new            0.07219    0.37264   0.194 0.846401    
## dump_reason_old           -0.42721    0.36414  -1.173 0.240712    
## dump_reason_break         -0.57940    0.34560  -1.676 0.093644 .  
## dump_reason_theft         -0.04497    0.55333  -0.081 0.935222    
## dump_reason_slow          -0.25820    0.47821  -0.540 0.589242    
## dump_reason_lag            0.22845    0.34634   0.660 0.509502    
## slt_lack_tu                0.85707    0.33681   2.545 0.010937 *  
## slt_lack_lang              0.74359    0.39371   1.889 0.058936 .  
## slt_lack_notint           -0.31912    0.35723  -0.893 0.371688    
## slt_lack_fear             -0.32464    0.30748  -1.056 0.291053    
## slt_lack_parts             0.70065    0.30417   2.304 0.021251 *  
## slt_lack_repairer          0.72305    0.33431   2.163 0.030556 *  
## rprd_usage_chlng_No        0.09627    0.47985   0.201 0.840985    
## rprd_usage_chlng_reluc    -0.24016    0.34020  -0.706 0.480238    
## rprd_usage_chlng_dur       0.19056    0.36570   0.521 0.602310    
## rprd_usage_chlng_fault    -0.70492    0.36279  -1.943 0.052012 .  
## rpr_missing_trait_behave  -0.07719    0.40245  -0.192 0.847906    
## rpr_missing_trait_ineff   -0.25274    0.30859  -0.819 0.412776    
## rpr_missing_trait_harm    -0.19489    0.30877  -0.631 0.527936    
## rpr_missing_trait_hard     0.41141    0.31345   1.313 0.189349    
## rpr_missing_trait_wage     0.16748    0.29675   0.564 0.572492    
## rpr_missing_trait_trust    0.65292    0.31573   2.068 0.038646 *  
## rpr_missing_trait_gender   0.80964    0.49728   1.628 0.103495    
## bad_rep_expN              -0.15473    0.41822  -0.370 0.711399    
## bad_rep_expY               0.33971    0.39820   0.853 0.393596    
## dev_tknto_rec_Y            0.60860    0.32634   1.865 0.062194 .  
## dev_rec_chlng_fair_price   0.09209    0.28958   0.318 0.750469    
## dev_rec_chlng_usable      -0.18539    0.30092  -0.616 0.537841    
## dev_rec_chlng_datasec      0.21768    0.32591   0.668 0.504180    
## dev_rec_chlng_env_poll     0.19792    0.32119   0.616 0.537756    
## dev_rec_chlng_hard_find   -1.14618    0.45119  -2.540 0.011075 *  
## will_dev_recN              0.02867    0.36626   0.078 0.937615    
## will_dev_recY             -0.35005    0.46936  -0.746 0.455780    
## did_with_data_Y1           0.28373    0.30745   0.923 0.356089    
## did_with_device_econ1      1.31402    0.37878   3.469 0.000522 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 448.21  on 350  degrees of freedom
## Residual deviance: 346.64  on 306  degrees of freedom
## AIC: 436.64
## 
## Number of Fisher Scoring iterations: 13
backward.model <- step(full.model,direction = "backward",trace = 0)

backward.model %>% summary()
## 
## Call:
## glm(formula = repair_try ~ gender + device_count_5_yr + dump_reason_break + 
##     slt_lack_tu + slt_lack_lang + slt_lack_parts + slt_lack_repairer + 
##     rprd_usage_chlng_fault + rpr_missing_trait_trust + rpr_missing_trait_gender + 
##     dev_tknto_rec_Y + dev_rec_chlng_hard_find + did_with_device_econ, 
##     family = binomial, data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4999  -0.9607   0.5115   0.8135   1.7348  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)   
## (Intercept)              -1.25724    0.48891  -2.572  0.01013 * 
## genderWoman              -0.76450    0.29607  -2.582  0.00982 **
## device_count_5_yr         0.11214    0.05442   2.061  0.03932 * 
## dump_reason_break        -0.40389    0.26534  -1.522  0.12796   
## slt_lack_tu               0.90456    0.30414   2.974  0.00294 **
## slt_lack_lang             0.75308    0.37256   2.021  0.04324 * 
## slt_lack_parts            0.73106    0.27123   2.695  0.00703 **
## slt_lack_repairer         0.68465    0.30933   2.213  0.02688 * 
## rprd_usage_chlng_fault   -0.66105    0.29823  -2.217  0.02665 * 
## rpr_missing_trait_trust   0.64316    0.27960   2.300  0.02143 * 
## rpr_missing_trait_gender  0.70075    0.45174   1.551  0.12085   
## dev_tknto_rec_Y           0.53534    0.28816   1.858  0.06320 . 
## dev_rec_chlng_hard_find  -1.03492    0.39741  -2.604  0.00921 **
## did_with_device_econ1     1.01172    0.32764   3.088  0.00202 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 448.21  on 350  degrees of freedom
## Residual deviance: 367.01  on 337  degrees of freedom
## AIC: 395.01
## 
## Number of Fisher Scoring iterations: 5

\[ \begin{aligned} \log\left[ \frac { P( repair\_try = 1 ) }{ 1 - P( repair\_try = 1 ) } \right] &= -1.26 - 0.76(gender_{woman}) + 0.11(device\_count\_5\_yr)\ - \\ &\quad 0.4(dump\_reason\_break) + 0.9(lack\_of\_tutorial) + 0.75(language\_barrier\_of\_tutorial)\ + \\ &\quad 0.73(lack\_of\_parts) + 0.68(lack\_of\_good\_repairer) - 0.66(fear_that_repaired_device_faulty)\ + \\ &\quad 0.64(repairers\_lack\_trust) + 0.7(gender\_adversary\_of\_repairers) + 0.54(dev\_tknto\_rec\_Y)\ - \\ &\quad 1.03(dev\_rec\_chlng\_hard\_find) + 1.01(did\_with\_device\_econ_{1}) + \epsilon \end{aligned} \] The AIC Value was Initially 436. After Stepwise Regression, it came down to 395.

The 10 fold cross validation accuracy is below:

cross_validated_model <- boot::cv.glm(df,glmfit = backward.model,K = 10)
print((1-cross_validated_model$delta[1])*100)
## [1] 80.53389

Lets also look at the ROC curve for the the fitted logistic regression model:

null.model <- glm(repair_try~1,family = binomial(),data = df)
Generate_RoC(full_model = full.model,null_model = null.model,df = df,direction = "B",outcome = "repair_try")
## [1] 351
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

With a good fit in the model, lets diagnosis our model parameters for multicolineariy. If the VIF (Variance Inflation Factor) is \(>10\) for any predictor, we might be in trouble.

cat("MIN IVF: ",min(vif(backward.model)))
## MIN IVF:  1.034216
cat("MAX IVF: ",max(vif(backward.model)))
## MAX IVF:  1.242712
cat("MEAN IVF: ",mean(vif(backward.model)))
## MEAN IVF:  1.124022

Values \(>0\) means odd>1 So, that accounts more for YES than NO.

gender_woman: \(-0.76\): Typicaly women are less inclined toward repair.

device_count_5_yr: \(0.11\) The more number of devices you have had during the last 5 years, the more you are inclined to try repair your own devices.

lack_of_tutorial: \(0.9\) If you feel that there is lack of tutorial out there, then there is a solid chance that you at least tried repair your own, but probably did/not succeed due to the lack of it.

language_barrier_of_tutorial: \(0.75\) This also predicts very well you feel that there is lack of tutorial In Your Own Language, but for the same reason stated above, there is solid chance that you at least tried repair your own, giving odd ratio of \(2.12\).

lack_of_parts: \(0.73\) Similar reason.

fear that trying to repair will lead to faulty device \(−0.66\) The negative log odd says this fear will lead to less amount of self repair trial.

repairers_lack_trust: \(0.64\) When you dont trust the repairers out there in the market, you try to do it on your own.

gender_adversary_of_repairers: \(0.70\) Gender Adversary makes you more likely to try your own luck. )

device ever taken to RECYCLER: \(0.54\) If you know the recycler community, and used to give them your devices/sell to them, then you care less about the safety and usefulness, and overcome fear to try your own.

Hard to Find RECYCLER: $ -1.03$ The above effect is better described with understanding from this. When you find it hard to find any recycler, your fear of losing the device utility (log odd -0.66) intensifies. And, so you do not try repair your devices on your own.

Have you did anything economic with your device? (selling/parts selling etc) \(1.01\). If you know your device still has some monetary value, you try to do your own. [Probably a bit contradictory! How do we fix that ? :P]

Lets look at the effects plot to better understand the individual effects of each predictor: