
### Linear Regression Using lm()  

data("swiss") 

dat < swiss 

linear_model < lm(Fertility ~ ., data = dat) 

summary(linear_model) 





# Call: 

# lm(formula = Fertility ~ ., data = dat) 

# 

# Residuals: 

# Min 1Q Median 3Q Max 

# 15.2743 5.2617 0.5032 4.1198 15.3213 

# 

# Coefficients: 

# Estimate Std. Error t value Pr(>t) 

# (Intercept) 66.91518 10.70604 6.250 1.91e07 *** 

# Agriculture 0.17211 0.07030 2.448 0.01873 * 

# Examination 0.25801 0.25388 1.016 0.31546 

# Education 0.87094 0.18303 4.758 2.43e05 *** 

# Catholic 0.10412 0.03526 2.953 0.00519 ** 

# Infant.Mortality 1.07705 0.38172 2.822 0.00734 ** 

#  

# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

# 

# Residual standard error: 7.165 on 41 degrees of freedom 

# Multiple Rsquared: 0.7067, Adjusted Rsquared: 0.671 

# Fstatistic: 19.76 on 5 and 41 DF, pvalue: 5.594e10 





### Using Linear Algebra  



y < matrix(dat$Fertility, nrow = nrow(dat)) 

X < cbind(1, as.matrix(x = dat[,1])) 

colnames(X)[1] < "(Intercept)" 



# N x k matrix 

N < nrow(X) 

k < ncol(X)  1 # number of predictor variables (ergo, excluding Intercept column) 



# Estimated Regression Coefficients 

beta_hat < solve(t(X)%*%X)%*%(t(X)%*%y) 



# Variance of outcome variable = Variance of residuals 

sigma_sq < residual_variance < (Nk1)^1 * sum((y  X %*% beta_hat)^2) 

residual_std_error < sqrt(residual_variance) 



# Variance and Std. Error of estimated coefficients of the linear model 

var_betaHat < sigma_sq * solve(t(X) %*% X) 

coeff_std_errors < sqrt(diag(var_betaHat)) 



# t values of estimates are ratio of estimated coefficients to std. errors 

t_values < beta_hat / coeff_std_errors 



# pvalues of tstatistics of estimated coefficeints 

p_values_tstat < 2 * pt(abs(t_values), Nk, lower.tail = FALSE) 



# assigning R's significance codes to obtained pvalues 

signif_codes_match < function(x){ 

ifelse(x <= 0.001,"***", 

ifelse(x <= 0.01,"**", 

ifelse(x < 0.05,"*", 

ifelse(x < 0.1,"."," ")))) 



} 

signif_codes < sapply(p_values_tstat, signif_codes_match) 



# Rsquared and Adjusted Rsquared (refer any econometrics / statistics textbook) 

R_sq < 1  (Nk1)*residual_variance / (N*mean((y  mean(y))^2)) 

R_sq_adj < 1  residual_variance / ((N/(N1))*mean((y  mean(y))^2)) 



# Residual sum of squares (RSS) for the full model 

RSS < (Nk1)*residual_variance 

# RSS for the partial model with only intercept (equal to mean), ergo, TSS 

RSS0 < TSS < sum((y  mean(y))^2) 



# F statistic based on RSS for full and partial models 

# k = degress of freedom of partial model 

# N  k  1 = degress of freedom of full model 

F_stat < ((RSS0  RSS)/k) / (RSS/(Nk1)) 



# pvalues of the F statistic 

p_value_F_stat < pf(F_stat, df1 = k, df2 = Nk1, lower.tail = FALSE) 



# stitch the main results toghether 

lm_results < as.data.frame(cbind(beta_hat, coeff_std_errors, 

t_values, p_values_tstat, signif_codes)) 

colnames(lm_results) < c("Estimate","Std. Error","t value","Pr(>t)","") 





### Print out results of all relevant calcualtions  





print(lm_results) 

cat("Residual standard error: ", 

round(residual_std_error, digits = 3), 

" on ",Nk1," degrees of freedom", 

"\nMultiple Rsquared: ",R_sq," Adjusted Rsquared: ",R_sq_adj, 

"\nFstatistic: ",F_stat, " on ",k1," and ",Nk1, 

" DF, pvalue: ", p_value_F_stat,"\n") 



# Estimate Std. Error t value Pr(>t) 

# (Intercept) 66.9151816789654 10.7060375853301 6.25022854119771 1.73336561301153e07 *** 

# Agriculture 0.172113970941457 0.0703039231786469 2.44814177018405 0.0186186100433133 * 

# Examination 0.258008239834722 0.253878200892098 1.01626779663678 0.315320687313066 

# Education 0.870940062939429 0.183028601571259 4.75849159892283 2.3228265226988e05 *** 

# Catholic 0.104115330743766 0.035257852536169 2.95296858017545 0.00513556154915653 ** 

# Infant.Mortality 1.07704814069103 0.381719650858061 2.82156849475775 0.00726899472564356 ** 



# Residual standard error: 7.165 on 41 degrees of freedom 

# Multiple Rsquared: 0.706735 Adjusted Rsquared: 0.670971 

# Fstatistic: 19.76106 on 4 and 41 DF, pvalue: 5.593799e10 



