8.5 Logistic Regression
일반적인 선형회귀분석으로는 regression problem은 풀 수 있지만, classification problem을 풀기에는 적합하지 않다. prediction 값 즉, output값에 제한이 없기 때문이다.
예로, 0 과 1로 분류하는 문제에서 prediction이 2가 나온다면 이것은 0, 1 중 무엇으로 분류해야할지 알 수가 없다.
그래서 prediction, output값이 0과 1사이로만 나오도록하는 선형회귀분석 방법이 Logistic Regression이다.
output값이 제한된 범위의 값을 가지도록 변형시켜보자
=> y = 1 / 1 + e**-(Wtx+b)
*-(Wtx + b)는 지수이다
해당 Wtx + b가 prediction 값이고, inf -> 1 / -inf -> 0 이 되므로 결괏값 범위 제한에 성공했다!
log를 씌워서 Wtx + b 만 남게 정리하면
log(y/1-y) = Wtx + b
pi(x) = feature ‘x’가 주어질 때, y가 1일 확률 = P(y=1|X=x)
log(pi(x)/1-pi(x)) = Wtx + b
W > 0 : 우상향 그래프
W < 0 : 좌상향 그래프
from sklearn import datasets
raw_cancer = datasets.load_breast_cancer()
X = raw_cancer.data
y = raw_cancer.target
from sklearn.model_selection import train_test_split
X_tn, X_te, y_tn, y_te = train_test_split(X,y,random_state=0)
from sklearn.preprocessing import StandardScaler
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std = std_scale.transform(X_te)
from sklearn.linear_model import LogisticRegression
# 선형회귀때 lasso와 ridge 처럼 제약식을 penalty 인수로 지정해줄 수 있다. 현재 classifier에는 l2 제약식을 써주었다(Ridge)
clf_logi_l2 = LogisticRegression(penalty='l2')
clf_logi_l2.fit(X_tn_std,y_tn)
# Logistic regression learned coef
print(clf_logi_l2.coef_)
# Logistic regression learned scalar
print(clf_logi_l2.intercept_)
# data prediction(in this case, classification)
pred_LogReg = clf_logi_l2.predict(X_te_std)
print(pred_LogReg)
# probability per class prediction
# there are 2 classes so, the result will have 2 cols
pred_prob = clf_logi_l2.predict_proba(X_te_std)
print(pred_prob)
[[-0.29792942 -0.58056355 -0.3109406 -0.377129 -0.11984232 0.42855478
-0.71131106 -0.85371164 -0.46688191 0.11762548 -1.38262136 0.0899184
-0.94778563 -0.94686238 0.18575731 0.99305313 0.11090349 -0.3458275
0.20290919 0.80470317 -0.91626377 -0.91726667 -0.8159834 -0.86539197
-0.45539191 0.10347391 -0.83009341 -0.98445173 -0.5920036 -0.61086989]]
[0.02713751]
[0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
0 1 0 0 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 1 0
0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 1 1
0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0]
[[9.98638613e-01 1.36138656e-03]
[3.95544804e-02 9.60445520e-01]
[1.30896362e-03 9.98691036e-01]
[1.24473354e-02 9.87552665e-01]
[2.44132101e-04 9.99755868e-01]
[4.50491513e-03 9.95495085e-01]
[1.13985968e-04 9.99886014e-01]
[1.82475894e-03 9.98175241e-01]
[9.67965506e-05 9.99903203e-01]
[1.75222878e-06 9.99998248e-01]
[1.76572612e-01 8.23427388e-01]
[8.24119135e-02 9.17588087e-01]
[9.66067493e-06 9.99990339e-01]
[5.39343196e-01 4.60656804e-01]
[3.98187854e-01 6.01812146e-01]
[9.95762760e-01 4.23724017e-03]
[2.75612083e-03 9.97243879e-01]
[9.99997097e-01 2.90271401e-06]
[9.99926506e-01 7.34935682e-05]
[9.99999997e-01 2.78313939e-09]
[9.98738365e-01 1.26163489e-03]
[9.81405399e-01 1.85946008e-02]
[1.77902039e-02 9.82209796e-01]
[9.65876713e-04 9.99034123e-01]
[9.99464578e-01 5.35421808e-04]
[6.73385015e-04 9.99326615e-01]
[5.50833875e-05 9.99944917e-01]
[9.69828919e-01 3.01710813e-02]
[1.62119075e-03 9.98378809e-01]
[9.99997821e-01 2.17867101e-06]
[6.00571253e-05 9.99939943e-01]
[9.99954808e-01 4.51921300e-05]
[1.09252006e-01 8.90747994e-01]
[9.97255978e-01 2.74402243e-03]
[4.51047979e-06 9.99995490e-01]
[9.97449456e-01 2.55054412e-03]
[1.97830173e-02 9.80216983e-01]
[9.99571529e-01 4.28470822e-04]
[8.45566258e-03 9.91544337e-01]
[9.99487912e-01 5.12087502e-04]
[9.42409583e-01 5.75904174e-02]
[8.34700429e-05 9.99916530e-01]
[9.32505814e-01 6.74941855e-02]
[8.11944408e-05 9.99918806e-01]
[6.08911689e-02 9.39108831e-01]
[9.99999999e-01 1.17373572e-09]
[1.00967748e-06 9.99998990e-01]
[1.48182234e-02 9.85181777e-01]
[6.33630458e-04 9.99366370e-01]
[9.99927519e-01 7.24813084e-05]
[9.99989528e-01 1.04724511e-05]
[8.04262948e-01 1.95737052e-01]
[9.99965014e-01 3.49860375e-05]
[1.36691079e-03 9.98633089e-01]
[1.95330244e-03 9.98046698e-01]
[5.74609838e-04 9.99425390e-01]
[1.05063052e-03 9.98949369e-01]
[7.96089471e-03 9.92039105e-01]
[1.00288029e-02 9.89971197e-01]
[9.99999999e-01 1.44073341e-09]
[9.97609027e-01 2.39097260e-03]
[9.99257870e-01 7.42129950e-04]
[3.14309030e-05 9.99968569e-01]
[4.40044150e-03 9.95599559e-01]
[9.99897373e-01 1.02627439e-04]
[1.52976144e-01 8.47023856e-01]
[1.00000000e+00 2.39185116e-13]
[9.99998777e-01 1.22317020e-06]
[9.99999046e-01 9.53579837e-07]
[7.96239235e-04 9.99203761e-01]
[3.87033734e-01 6.12966266e-01]
[9.99993469e-01 6.53125942e-06]
[2.97085842e-03 9.97029142e-01]
[8.09412134e-01 1.90587866e-01]
[9.99996998e-01 3.00240009e-06]
[1.75950117e-02 9.82404988e-01]
[4.94325863e-05 9.99950567e-01]
[3.51047770e-02 9.64895223e-01]
[4.25841119e-04 9.99574159e-01]
[2.09232609e-05 9.99979077e-01]
[9.82374564e-01 1.76254356e-02]
[1.00000000e+00 3.57855006e-10]
[9.99988747e-01 1.12526453e-05]
[5.94724730e-05 9.99940528e-01]
[9.62731634e-01 3.72683662e-02]
[1.69452548e-03 9.98305475e-01]
[6.14966533e-05 9.99938503e-01]
[6.36886875e-06 9.99993631e-01]
[9.99902779e-01 9.72205364e-05]
[1.00000000e+00 8.14423797e-11]
[3.47458432e-05 9.99965254e-01]
[5.53589378e-01 4.46410622e-01]
[6.91462937e-01 3.08537063e-01]
[9.99996851e-01 3.14924112e-06]
[2.01951834e-03 9.97980482e-01]
[2.39759190e-03 9.97602408e-01]
[9.99999992e-01 7.92006333e-09]
[1.03400237e-02 9.89659976e-01]
[9.23218910e-03 9.90767811e-01]
[9.80048490e-04 9.99019952e-01]
[5.45753731e-09 9.99999995e-01]
[3.09034901e-03 9.96909651e-01]
[6.22819445e-03 9.93771806e-01]
[1.49494565e-01 8.50505435e-01]
[9.99994787e-01 5.21292981e-06]
[6.02188244e-04 9.99397812e-01]
[9.99995658e-01 4.34219020e-06]
[9.49795077e-02 9.05020492e-01]
[3.27428663e-01 6.72571337e-01]
[1.72350019e-02 9.82764998e-01]
[3.75686888e-02 9.62431311e-01]
[9.99975711e-01 2.42887910e-05]
[9.99911399e-01 8.86014791e-05]
[8.65663331e-02 9.13433667e-01]
[8.21398481e-04 9.99178602e-01]
[2.45946373e-02 9.75405363e-01]
[1.43898490e-01 8.56101510e-01]
[1.58128486e-03 9.98418715e-01]
[1.79682971e-02 9.82031703e-01]
[1.18803803e-03 9.98811962e-01]
[1.55728346e-02 9.84427165e-01]
[1.43822197e-03 9.98561778e-01]
[3.86829219e-01 6.13170781e-01]
[2.65232841e-02 9.73476716e-01]
[9.99999918e-01 8.17382381e-08]
[1.28424726e-01 8.71575274e-01]
[4.67709202e-01 5.32290798e-01]
[2.58725940e-04 9.99741274e-01]
[3.25269018e-05 9.99967473e-01]
[4.00075207e-05 9.99959992e-01]
[9.99901036e-01 9.89636008e-05]
[1.27248974e-04 9.99872751e-01]
[2.66411581e-04 9.99733588e-01]
[2.13163719e-01 7.86836281e-01]
[2.92511631e-02 9.70748837e-01]
[2.37309476e-05 9.99976269e-01]
[5.09465728e-01 4.90534272e-01]
[6.17881971e-01 3.82118029e-01]
[1.00000000e+00 1.46648090e-12]
[8.41453252e-05 9.99915855e-01]
[1.58701592e-03 9.98412984e-01]
[1.26424968e-03 9.98735750e-01]
[9.99999994e-01 5.81805301e-09]]
# Evaluating precision
from sklearn.metrics import precision_score
precision = precision_score(y_te, pred_LogReg)
# result: 96.7 % precision
print(precision)
# Checking confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_te, pred_LogReg)
print(cm)
# Checking classification report
from sklearn.metrics import classification_report
cr = classification_report(y_te, pred_LogReg)
print(cr)
0.9666666666666667
[[50 3]
[ 3 87]]
precision recall f1-score support
0 0.94 0.94 0.94 53
1 0.97 0.97 0.97 90
accuracy 0.96 143
macro avg 0.96 0.96 0.96 143
weighted avg 0.96 0.96 0.96 143