#Application of the regression model
RaceHL=1#ishispaniclatino
RaceW=0
PH=16.00
PS=4.00
Log_overallrisk=.121*RaceHL-0.044*RaceW-0.109*PH+0.14135*PS
print(exp(Log_overallrisk))
#Risk of this individual turning over is 0.347.This individual is 65.27% less likely to quit when compared to individual
#with scores of 0 on each of the covariance in the model. But in reality Pay_Hourly and Pay_sat cannot be 0.
#Therefore, we mean-center the covariates
survdat$c_Pay_hourly<-scale(survdat$Pay_hourly,center=TRUE,scale=FALSE)
survdat$c_Pay_sat<-scale(survdat$Pay_sat,center=TRUE,scale=FALSE)
#Also,change the reference group from black to HispanicLatino
survdat$HL_Race<-factor(survdat$Race,levels=c("Hispaniclatino","Black","White"))
cox_reg3<-coxph(Surv(LOS,censored)~HL_Race+c_Pay_hourly+c_Pay_sat,data=dropna(survdat,LOS,censored,Race,Pay_hourly,Pay_sat))
summary(cox_reg3)
RaceB=0
RaceW=0
PH=16.00-mean(survdat$Pay_hourly,na.rm=TRUE )#na.rm will drop the missing data
PS=4.00-mean(survdat$Pay_sat,na.rm=TRUE)
Log_overallrisk=.121*RaceB-0.044*RaceW-0.109*PH+0.14135*PS
print(exp(Log_overallrisk))
##Risk of this individual turning over is 0.851.This individual is 14.8% less likely to quit when compared to individual
#with scores of 0 on dummy variables and mean Pay_hourly and Pay_Sat
library(tidyr)
anova(cox_reg,cox_reg1)#number of observations should be same