105. ํ์ด๋ ํ๋ก์ ํธ (11)Random Forest, Dicision Tree๋ชจ๋ธ ๋ง๋ค๊ธฐ
0. ๋ชจ๋ ์ํฌํธ
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as mt
import matplotlib.pyplot as plt
import seaborn as sns
1. ๋ฐ์ดํฐ ๋ถ๋ฌ์ค๊ธฐ
movie = pd.read_csv(r'๊ฒฝ๋ก๋ช
\ํ์ผ๋ช
.csv')
movie.info()
movie.isna().sum() #๊ฒฐ์ธก์น ํ์ธ
#movie = movie.dropna(axis=0) #๊ฒฐ์ธก์น ํ ์ญ์
movie
2. ํ๊ฒ(y)๋ณ์, x๋ณ์ ์ง์
y = movie["์ฒ๋ง๊ด๊ฐ๋ํ์ฌ๋ถ"]
#y = y.to_numpy().ravel() # 1์ฐจ์ ๋ฒกํฐ ํํ๋ก ์ถ๋ ฅํ๊ธฐ ์ํด ravel ์ฌ์ฉ
y
x = movie.drop(columns=['์ฒ๋ง๊ด๊ฐ๋ํ์ฌ๋ถ'])
x
3. ํ๋ จ์
, ๊ฒ์ ์
๋๋๊ธฐ
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.3, random_state=123)
4. RandomForest ๊ตฌํ
model = RandomForestClassifier(random_state=123)
model.fit(x_train, y_train)
5. ๊ฒฐ๊ณผ ํ์ธ
y_pred = model.predict(x_test)
print('Train_Accuracy: ', model.score(x_train, y_train),'\n')
accuracy = mt.accuracy_score(y_test, y_pred)
recall = mt.recall_score(y_test, y_pred)
precision = mt.precision_score(y_test, y_pred)
f1_score = mt.f1_score(y_test, y_pred)
matrix = mt.confusion_matrix(y_test, y_pred)
print('Accuracy: ', format(accuracy,'.2f'))
print('Recall: ', format(recall,'.2f'))
print('Precision: ', format(precision,'.2f'))
print('F1_score: ', format(f1_score,'.2f'))
print('Confusion Matrix:','\n', matrix)
Train_Accuracy: 1.0
Accuracy: 0.93
Recall: 0.93
Precision: 0.93
F1_score: 0.93
Confusion Matrix:
[[14 1]
[ 1 14]]
6. ์๊ฐํ
ftr_importances_values = model.feature_importances_
ftr_importances = pd.Series(ftr_importances_values, index=x_train.columns)
ftr_top = ftr_importances.sort_values(ascending=False)
plt.rc("font", family = "Malgun Gothic")
sns.set(font="Malgun Gothic",
rc={"axes.unicode_minus":False}, style="white")
plt.figure(figsize=(8,6))
plt.title('Feature Importances')
sns.barplot(x=ftr_top, y=ftr_top.index, palette="Set3")
sns.despine(left=True, bottom=True)
plt.show()
#0. ๋ชจ๋ ์ํฌํธ
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import sklearn.metrics as mt
from sklearn.tree import export_graphviz #์๊ฐํ
from graphviz import Source #์๊ฐํ
1. ๋ฐ์ดํฐ ๋ถ๋ฌ์ค๊ธฐ
movie = pd.read_csv(r'C:\Users\KIM YOON\Desktop\21.12.27~22.01.07 ํ๋ก์ ํธ\๋ณ์์ต์ข
(์๊ด๊ด๊ณ๋์).csv')
movie.info()
movie.isna().sum() #๊ฒฐ์ธก์น ํ์ธ
#movie = movie.dropna(axis=0) # ๊ฒฐ์ธก์น ํ ์ญ์
movie
2. ํ๊ฒ(y)๋ณ์ ์ง์
y = movie["์ฒ๋ง๊ด๊ฐ๋ํ์ฌ๋ถ"]
#y = y.to_numpy().ravel() #1์ฐจ์ ๋ฒกํฐ ํํ๋ก ์ถ๋ ฅํ๊ธฐ ์ํด ravel ์ฌ์ฉ
y
x = movie.drop(columns=['์ฒ๋ง๊ด๊ฐ๋ํ์ฌ๋ถ'])
x
3. ํ๋ จ์
, ๊ฒ์ ์
๋๋๊ธฐ
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.3, random_state=123)
4. Dicision Tree ๋ชจ๋ธ ๊ตฌํ
model = DecisionTreeClassifier(random_state=123)
model.fit(x_train, y_train)
5. ๊ฒฐ๊ณผ ํ์ธ
y_pred = model.predict(x_test)
print('Train_Accuracy: ', model.score(x_train, y_train),'\n')
accuracy = mt.accuracy_score(y_test, y_pred)
recall = mt.recall_score(y_test, y_pred)
precision = mt.precision_score(y_test, y_pred)
f1_score = mt.f1_score(y_test, y_pred)
matrix = mt.confusion_matrix(y_test, y_pred)
print('Accuracy: ', format(accuracy,'.2f'))
print('Recall: ', format(recall,'.2f'))
print('Precision: ', format(precision,'.2f'))
print('F1_score: ', format(f1_score,'.2f'))
print('Confusion Matrix:','\n', matrix)
Train_Accuracy: 1.0
Accuracy: 0.90
Recall: 0.93
Precision: 0.88
F1_score: 0.90
Confusion Matrix:
[[13 2]
[ 1 14]]
7. ์๊ฐํ
* ํธ๋ฆฌ ์๊ฐํ๋ฅผ ์ํ export_graphviz ๋ชจ๋ ์ํฌํธ (์ด๋ฅผ ์ํด graphviz ๋ณ๋ ์ค์น ํ์)
x_list = list(x.columns)
y_list = list(y.drop_duplicates(inplace=False))
x_list = ['screen', 'cost', 'naver_score', 'watcha_score', 'news', 'tv', 'preview', 'google_t', 'director', 'actor']
graph = export_graphviz(model,
out_file='tree_graph.dot',
feature_names=x_list,
class_names=str(y_list),
filled=True, #์์น
rounded=True, #๋ชจ์๋ฆฌ ๋ฅ๊ธ๊ฒ
special_characters=True, #ํน์๋ฌธ์
impurity=True) #GINI ๊ณ์
dot file read
file = open("tree_graph.dot")
dot_graph = file.read()
dot file ์๊ฐํ
Source(dot_graph)
์ฐธ๊ณ
graphviz ์ค์น/์คํ ์๋ฌ https://free-eunb.tistory.com/14