DAY44. Python Matplot (2)PandasPlot
line bar pie
Pandas ๊ฐ์ฒด ์๊ฐํ
ํ์) object.plot(kind='์ฐจํธ์ ํ', ์ฐจํธ์์ฑ)
object : Series(1์ฐจ์), DataFrame(2์ฐจ์)
kind= bar, pie, scatter, hist ๋ฑ
import pandas as pd # object
import numpy as np # data
1. ๊ธฐ๋ณธ ์ฐจํธ ์๊ฐํ
1) Series(1์ฐจ์) ๊ฐ์ฒด ์๊ฐํ
ser = pd.Series(np.random.randn(10),
index=np.arange(0, 100, 10))
print(ser)
๊ธฐ๋ณธ์ฐจํธ : ์ ํ-์ ๊ทธ๋ํ, ์์-ํ๋
ser.plot(color='b')
2) DataFrame(2์ฐจ์) ๊ฐ์ฒด ์๊ฐํ
df = pd.DataFrame(np.random.randn(10,4),
columns=('one','two','three','fore'))
print(df)
๊ธฐ๋ณธ์ฐจํธ : ์ ํ-์ ๊ทธ๋ํ
df.plot()
๋ง๋์ฐจํธ : ์ธ๋ก
df.plot(kind='bar', title='bar chart')
๋ง๋์ฐจํธ : ๊ฐ๋ก์ธ๋ก
df.plot(kind='barh', title='barh chart')
๋ง๋์ฐจํธ : ๊ฐ๋ก์ธ๋ก + ๋์ ํ
df.plot(kind='barh', title='barh chart', stacked=True)
2. dataset ์ด์ฉ
1) tips.csv
tips = pd.read_csv(r'C:\ITWILL\4_Python-2\data\tips.csv')
tips.info()
0 total_bill 244 non-null float64
1 tip 244 non-null float64
2 sex 244 non-null object
3 smoker 244 non-null object
4 day 244 non-null object
5 time 244 non-null object
6 size 244 non-null int64
๋ฒ์ฃผํ ๋ณ์ ๋น๋์
tips['day'].unique() #['Sun', 'Sat', 'Thur', 'Fri']
tips['size'].unique() #[2, 3, 4, 1, 6, 5]
pie ์ฐจํธ : ํ์ฌ ์์ผ๋ณ
cnt = tips['day'].value_counts()
cnt.plot(kind = 'pie')
๊ต์ฐจ๋ถํ ํ : ์์ผ(day) vs ๊ท๋ชจ(size)
tab = pd.crosstab(index=tips['day'], columns=tips['size'])
print(tab)
size 1 2 3 4 5 6
day
Fri 1 16 1 1 0 0
Sat 2 53 18 13 1 0
Sun 0 39 15 18 3 1
Thur 1 48 4 5 1 3
tab.shape #(4, 6)
type(tab) #pandas.core.frame.DataFrame
tab.plot(kind = 'barh', title = 'day vs size',
stacked=True)
tab.columns #์ด์ด๋ฆ ํ์ธ
tab.index #ํ์ด๋ฆ ํ์ธ
ํ์ด ์ ํ : DF.loc[row,col], DF.iloc[row,col]
4x4 ์ ํ
tab_df = tab.loc[:,2:5] #๋ช
์นญ ๊ธฐ๋ฐ
print(tab_df)
tab_df2 = tab.iloc[:,1:5] #์์น ๊ธฐ๋ฐ
print(tab_df)
tab_df.plot(kind='barh', stacked=True,
title ='day vs size')
์ฐ์ํ ๋ณ์ ์๊ฐํ
kind = scatter, hist, kde, box ๋ฑ
import pandas as pd #object
import matplotlib.pyplot as plt #API
dataset load
dataset = pd.read_csv(r'C:\ITWILL\4_Python2\data\dataset.csv')
dataset.info()
1. ์ฐ์ ๋
์ฐ์ํ ๋ณ์ : age vs price - ๋น์จ์ฒ๋
plt.scatter(x = dataset['age'], y = dataset['price'],
c = dataset['gender'])
plt.show()
2. hist, kde, box
type(dataset) #pandas.core.frame.DataFrame
dir(dataset) #ํธ์ถ ๊ฐ๋ฅ method ํ์ธ
dataset.head()
1) ํ์คํ ๊ทธ๋จ
dataset['price'].plot(kind='hist', title='histogram')
2) ์ปค๋๋ฐ๋์ถ์ : ํ์คํ ๊ทธ๋จ -> ํ๋ฅ ๋ฐ๋๋ถํฌ๊ณก์
dataset['price'].plot(kind='kde', title='kernel density plot')
์ฌ๋ฌ๊ฐ ๋ณ์ ๋์
dataset[['age','price']].plot(kind='kde', title='kernel density plot')
3) box-plot
dataset[['age','price']].plot(kind='box', title='boxplot')
dataset['price'].plot(kind='box', title='boxplot')
์ฐ์ ๋ ํ๋ ฌ๊ณผ 3์ฐจ์ ์ฐ์ ๋
import pandas as pd # object
import matplotlib.pyplot as plt # chart
1. ์ฐ์ ๋ ํ๋ ฌ
from pandas.plotting import scatter_matrix
iris dataset load
iris = pd.read_csv(r'C:\ITWILL\4_Python-2\data\iris.csv')
cols = list(iris.columns)
x๋ณ์ ์ ํ
x = iris[cols[:4]]
print(x.head())
์ฐ์ ๋ matrix
scatter_matrix(x)
plt.show()
2. 3์ฐจ์ ์ฐ์ ๋
from mpl_toolkits.mplot3d import Axes3D
x, y, z์ถ ์๋ฃ ๋ง๋ค๊ธฐ
col_x = iris[cols[0]]
col_y = iris[cols[1]]
col_z = iris[cols[2]]
์นผ๋ผ ์๋ฃ : ๊ฝ์ ์ข
๋ณ
cdata = []
for s in iris['Species'] :
if s == 'setosa' :
cdata.append(1)
elif s == 'versicolor' :
cdata.append(2)
else :
cdata.append(3)
print(cdata)
fig = plt.figure()
chart = fig.add_subplot(projection='3d') #3์ฐจ์ ์๊ฐํ
chart.scatter(col_x, col_y, col_z, c=cdata)
chart.set_xlabel('Sepal.Length')
chart.set_ylabel('Sepal.Width')
chart.set_zlabel('Petal.Length')
plt.show()
timeSeriesPlot
์๊ณ์ด
1. ๋ ์งํ์ ์์ (๋ค๊ตญ์ด)
2. ์๊ณ์ด ๋ฐ์ดํฐ/์๊ฐํ
3. ์ด๋ํ๊ท
from datetime import datetime #class : ๋ ์งํ์ ์์ /๊ฐ์ฒด
import pandas as pd #csv fire
import matplotlib.pyplot as plt #chart
csv fild load
cospi = pd.read_csv(r'C:\ITWILL\4_Python-2\data\cospi.csv')
cospi.info() #0 Date 247 non-null object
cospi.head() #26-Feb-16 -> 2016-02-26
1. ๋ ์งํ์ ์์ (๋ค๊ตญ์ด)
Date = cospi.Date #์นผ๋ผ ์ถ์ถ
len(Date) #247
list+for : 26-Feb-16 -> 2016-02-26
kDate = [datetime.strptime(d, "%d-%b-%y") for d in Date]
๋ ์งํ ๋ณํ : object(์บ๋ฆญํฐํ) -> datetime(๋ ์งํ)
cospi.Date = kDate
cospi.info()
2. ์๊ณ์ด ์๋ฃ/์๊ฐํ
1) 1๊ฐ ์นผ๋ผ์ ์ถ์ธ๊ทธ๋ํ (ex.์ฃผ์์ ์๊ฐ์ ์ ์ํด ์ถ์ธ๊ฐ ๋ณํ)
cospi['High'].plot(title = 'Trend line of High column') #x์ถ์ด ์์ธ์ธ ๊ธฐ๋ณธ ์ ๊ทธ๋ํ
2) 2๊ฐ ์นผ๋ผ์ ์ถ์ธ๊ทธ๋ํ
cospi[['High', 'Low']].plot(title = 'Trend line of High and Low column')
3) ์๊ณ์ด ์๋ฃ : x์ถ index -> Date ๋ณ๊ฒฝ
new_cospi = cospi.set_index('Date')
new_cospi.head()
new_cospi['High'].plot(title = 'Trend line of High column')
๋
๋๋ณ, ์๋ณ, ์ผ๋ณ
new_cospi['2016']
new_cospi['2016-02']
new_cospi['2015-09':'2015-07']
2016๋
๋ ์ฃผ๊ฐ ์ถ์ธ์ ์๊ฐํ
new_cospi_HL = new_cospi[['High', 'Low']]
new_cospi_HL['2016'].plot(title = 'Trend line of 2016year')
new_cospi_HL['2016-02'].plot(title = 'Trend line of 2016-02')
3. ์ด๋ํ๊ท (ํํ) : ์ง์ ํ ๋ ์ง ๋จ์ ํ๊ท ๊ณ์ฐ -> ์ด๋
5์ผ ๋จ์ ํ๊ท ๊ณ์ฐ
roll_mean5 = pd.Series.rolling(new_cospi['High'],
window=5, center=False).mean()
10์ผ ๋จ์ ํ๊ท ๊ณ์ฐ
roll_mean10 = pd.Series.rolling(new_cospi['High'],
window=10, center=False).mean()
20์ผ ๋จ์ ํ๊ท ๊ณ์ฐ
roll_mean20 = pd.Series.rolling(new_cospi['High'],
window=20, center=False).mean()
roll mean ์๊ฐํ : subplot ์ด์ฉ
fig = plt.figure(figsize = (12, 5))
chart = fig.add_subplot()
chart.plot(new_cospi['High'], color = 'blue', label='High columns')
chart.plot(roll_mean5, color = 'red', label='5 days rolling mean')
chart.plot(roll_mean10, color = 'green', label='10 days rolling mean')
chart.plot(roll_mean20, color = 'orange', label='20 days rolling mean')
plt.legend(loc='best')
plt.show()