Nunmpy ํจํค์ง
์์น ๊ณผํ์ฉ ๋ฐ์ดํฐ ์ฒ๋ฆฌ ๋ชฉ์ ์ผ๋ก ์ฌ์ฉ
์ ํ๋์(๋ฒกํฐ, ํ๋ ฌ) ์ฐ์ฐ ๊ด๋ จ ํจ์ ์ ๊ณต
N์ฐจ์ ๋ฐฐ์ด, ์ ํ๋์ ์ฐ์ฐ, ๊ณ ์ ์ฐ์ฐ
์ํ/ํต๊ณ ํจ์ ์ ๊ณต
indexing/slicing
broadcast ์ฐ์ฐ : ์๋ก ๋ค๋ฅธ ์ฐจ์ ๊ฐ์ ์ฐ์ฐ
import numpy as np
1. list ๋ฐฐ์ด vs numpy ๋ฐฐ์ด
1) list ๋ฐฐ์ด
lst = [1, 2, 3, 3.5] #์ ์ํ๊ณผ ์ค์ํ
lst #[1, 2, 3, 3.5] -> ๋ค์ํ ์๋ฃํ
lst * 3 #[1, 2, 3, 3.5, 1, 2, 3, 3.5, 1, 2, 3, 3.5]
#lst * 0.5 = TypeError
list + for
calc = [v*0.5 for v in lst]
calc #[0.5, 1.0, 1.5, 1.75]
sum(lst) #์ธ๋ถํจ์ ์ด์ฉ
2) numpy ๋ฐฐ์ด
arr = np.array(lst) #list -> numpyํ ๋ณํ
arr #array([1. , 2. , 3. , 3.5]) -> ๋์ผํ ์๋ฃํ
arr * 0.5 #์ฐ์ ์ฐ์ฐ : array([0.5 , 1. , 1.5 , 1.75])
arr.sum() #์์ฒด ๊ฐ์ฒด ์ง์ ํจ์(mehod)
type(arr) #numpy.ndarray
dir(arr)
arr.size #4
arr.shape #(4,)
arr.ndim #1
arr2 = np.array([10, 20, 30, '40'])
arr2 #array(['10', '20', '30', '40'], dtype='<U11')
2. array() : ๋ค์ฐจ์ ๋ฐฐ์ด ์์ฑ ํจ์
1) ๋จ์ผ list -> 1์ฐจ์ ๋ฐฐ์ด
lst1d = [3, 5.3, 4, 7]
list -> array
arr1d = np.array(lst1d)
arr1d.shape #(4,)
arr1d.ndim #1 -> 1์ฐจ์
2) ์ค์ฒฉ list -> 2์ฐจ์ ๋ฐฐ์ด
lst2d = [[1, 2, 3, 4], [5, 6, 7, 8]]
print(lst2d) #[[1, 2, 3, 4], [5, 6, 7, 8]]
list -> array
arr2d = np.array(lst2d)
print(arr2d)
[[1 2 3 4] -> 1ํ
[5 6 7 8]] -> 2ํ
arr2d.shape #(2, 4)
arr2d.ndim #2
arr2d.size #8
3. broadcast ์ฐ์ฐ
์์ ์ฐจ์์ด ํฐ ์ฐจ์์ผ๋ก ๋์ด๋ ํ 1:1 ์ฐ์ฐ
1) scala(0) vs vector(1)
arr1d * 0.5 #array([1.5 , 2.65, 2. , 3.5 ])
2) scala(0) vs matrix(2)
arr2d * 0.5
array([[0.5, 1. , 1.5, 2. ],
[2.5, 3. , 3.5, 4. ]])
3) vector(1) vs matrix(2)
arr1d.shape #(4,)
arr2d.shape #(2, 4)
arr1d * arr2d
array([[ 3. , 10.6, 12. , 28. ],
[15. , 31.8, 28. , 56. ]])
print(arr1d) #[3. 5.3 4. 7. ]
mu = arr1d.mean() #4.825
diff = (arr1d - mu)**2 #broadcast ์ฐ์ฐ array([3.330625, 0.225625, 0.680625, 4.730625])
type(diff) #numpy.ndarray
var = sum(diff) / arr1d.size #๋ถ์ฐ 2.241875
var = sum(diff) / (arr1d.size -1) #list๋ฐฐ์ด๋ณด๋ค numpy๊ฐ ๊ณ์ฐ์์์ ๊ฐํธํ๋ค
4. zeros() ํจ์ vs ones() ํจ์
zeros(0ํ๋ ฌ) : ๋ชจ๋ ๊ฐ์ 0์ผ๋ก ์ด๊ธฐํํ๋ค, ๋ชจ๋ ํ๋ ฌ์ด 0์ ๊ฐ๊ฒ ํ๋ค.
ones(1ํ๋ ฌ) : ๋ชจ๋ ๊ฐ์ 1๋ก ์ด๊ธฐํํ๋ค, ๋ชจ๋ ํ๋ ฌ์ด 1์ ๊ฐ๊ฒ ํ๋ค.
zarr = np.zeros((3,10)) #3ํ 10์ด์ 0ํ๋ ฌ
zarr
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) -> ๋ฐ๊นฅ๊ดํธ 2๊ฐ : 2์ฐจ์
oarr = np.ones((3,10))
oarr
array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
5. arange(start, stop, step)
1) range vs arange
list(range(1,11)) #[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
#range(-1.0, 10.5) : TypeError : 'float'
np.arange(1, 11) #array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
np.arange(-1.2, 5.5) #array([-1.2, -0.2, 0.8, 1.8, 2.8, 3.8, 4.8])
x์ ์์ด์ ๋ํ 2์ฐจ ๋ฐฉ์ ์
x = np.arange(-1.0, 2, 0.1)
x.size #30 (์์์ ๊ฐ์)
f(x) ํจ์
def f(x) :
y = x**2 + 2*x +3
return y
ํจ์ ํธ์ถ
f(x) #y๋ฐํ
2์ฐจ ๋ฐฉ์ ์ ๊ทธ๋ํ
import matplotlib.pyplot as plt
plt.plot(x, f(x)) #์ ๊ทธ๋ํ(x์ถ, y์ถ)
plt.show
2) ์์ธ
zarr #0ํ๋ ฌ
cnt = 0
for i in np.arange(3) : #ํ index
for j in np.arange(10) : #์ด index
cnt += 1
zarr[i,j] = cnt
zarr
indexing
1์ฐจ์ : list ์์ธ ๋์ผ
2,3์ฐจ์ ์์ธ
์กฐ๊ฑด์ ์์ธ
import numpy as np
1. ์์ธ(indexing) : ์๋ฃ ์ฐธ์กฐ
1์ฐจ์ : obj[index]] : list ๋์ผ
2์ฐจ์ : obj[ํ ,์ด]
3์ฐจ์ : obj[๋ฉด, ํ, ์ด]
1) list ์์ธ
lst = list(range(6)) #0~5
lst #[0, 1, 2, 3, 4, 5]
lst[2] #2
lst[:3] #[0, 1, 2]
lst[3:] #[3, 4, 5]
lst[-1] #5
2) numpy ์์ธ
arr = np.arange(10)
arr #array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[2] #2
arr[:3] #array([0, 1, 2])
arr[3:] #array([3, 4, 5, 6, 7, 8, 9])
arr[-1] #9
2. slicing : ํน์ ๋ถ๋ถ์ ์๋ผ์ new object
arr_obj = arr[1:4]
arr_obj #array([1, 2, 3])
์ ์ฒด ์์ ์์
arr_obj[:] = 100
arr_obj #array([100, 100, 100])
์๋ณธ ๋ด์ฉ ํ์ธ
arr #array([ 0, 100, 100, 100, 4, 5, 6, 7, 8, 9])
* ํด๋น ๊ตฌ๊ฐ์ ์๋ณธ์์๋ ์์ ๋จ.
WHY? slicingํ ๋ ์๋ณธ์ ์ฃผ์๋ฅผ ๋ฐํํ๊ธฐ ๋๋ฌธ
๋ด์ฉ ๋ฐํ
arr_obj2 = arr[1:4].copy()
arr_obj2
arr_obj2[:] = 500
arr_obj2 #array([500, 500, 500])
์๋ณธ ๋ด์ฉ
arr #array([ 0, 100, 100, 100, 4, 5, 6, 7, 8, 9])
* list ์์ธ ๋ฌธ๋ฒ์ ๊ทธ๋๋ก ์ ์ฉํ๋ฉด ๋จ
3. ๊ณ ์ฐจ์ ์์ธ
1) 2์ฐจ์ ์์ธ
arr2d = np.array([[1,2,3], [4,5,6], [7,8,9]])
arr2d
array([[1, 2, 3], -> 1ํ
[4, 5, 6], -> 2ํ
[7, 8, 9]])-> 3ํ
arr2d.shape #(3, 3)
ํ index ๊ธฐ๋ณธ
arr2d[0] #arr2d[0, :]
arr2d[:2] #์ฐ์ 2ํ ์ ํ
arr2d[[0,2]] #๋น์ฐ์ 2ํ ์ ํ
arr2d[:, [0,2]] #ํ ์ ์ฒด ์ ํ, ๋น์ฐ์ 2์ด ์ ํ
arr2d[::2] #ํ์ํ ์ ํ [start:stop:step]
arr2d[:2,1:] #box์ ํ
2) 3์ฐจ์ ์์ธ
arr3d = np.array([ [[1,2,3], [4,5,6]], [[7,8,9], [10,11,12]] ]) #[]3๊ฐ ์ค์ฒฉ -> 3์ฐจ์
arr3d
array([[[ 1, 2, 3],
[ 4, 5, 6]],
[[ 7, 8, 9],
[10, 11, 12]]])
๋ฉด index ๊ธฐ๋ณธ
arr3d[0] #1๋ฉด์ ์์ธ
arr3d[1] #6๋ฉด์ ์์ธ
arr3d[0,1] #1๋ฉด 2ํ -> array([4, 5, 6])
arr3d[0, 1, 2] #1๋ฉด 2ํ 3์ด -> 6
* 4์ฐจ์ : image = [size, h, w, c]
4. ์กฐ๊ฑด์ ์์ธ
dataset = np.random.randn(3,4)
dataset
dataset.shape #(3, 4)
0.7 ์ด์ ์์ ์ถ์ถ
dataset[dataset >= 0.7] #array([1.21270247, 0.82024283, 0.91840696, 1.71777067])
0.1 ~ 0.7 ์์ ์ถ์ถ : ๋ฒ์
dataset[dataset >= 0.1 and dataset <= 0.7] #ValueError
numpy ๋
ผ๋ฆฌ์ ํจ์
np.logcal_and() #๋
ผ๋ฆฌ๊ณฑ
np.logcal_or() #๋
ผ๋ฆฌ ํฉ
np.logcal_not() #๋ถ์
np.logcal_xor() #๋ฐฐํ์ ๋
ผ๋ฆฌํฉ
dataset[np.logical_and(dataset >= 0.1, dataset <= 0.7)] #array([0.12758051, 0.5333522 , 0.11015089])
pandas ๊ฐ์ฒด ์ ์ฉ
import pandas as pd
ser = pd.Series([3, 2, 5, 4, 1, 8])
ser[np.logical_and(ser >= 3, ser <=5)]
0 3
2 5
3 4
universal
๋ฒ์ฉํจ์(universal function)
numpy ์ผ๋ฐ์ ์ธ ์ํ/ํต๊ณ ๊ด๋ จ ํจ์
import numpy as np
1. numpy ์ ๊ณต ํจ์ : np.ํจ์(object)
object : list or ๊ธฐํ
list ๊ฐ์ฒด ์์ฒด๋ ์ํ/ํต๊ณ ํจ์ ์์
data = [1, 3, -5, 7] # list object
np.abs(data) # ์ ๋๊ฐ : array([1, 3, 5, 7])
np.sqrt(data) # ์ ๊ณฑ๊ทผ : array([1. , 1.73205081, nan, 2.64575131])
np.sqrt(np.abs(data)) # [1. , 1.73205081, 2.23606798, 2.64575131]
np.square(data) # ์ ๊ณฑ : [ 1, 9, 25, 49]
np.sign(data) # ๋ถํธ : [ 1, 1, -1, 1]
np.var(data) # ๋ถ์ฐ : 18.75
np.std(data) # ํ์คํธ์ฐจ : 4.330127018922
๋ก๊ทธ : ์๋งํ ๋ณํ - data ์ ๊ทํ : ์ผ์ ํ ๋ฒ์๋ก ์กฐ์
data2 = np.array([1, 2.5, 3.35, 4.6, 55.3])
data2
np.log(data2) # [0. , 0.91629073, 1.20896035, 1.5260563 , 4.01277291]
์ง์ : ๊ธ๊ฒฉํ ๋ณํ - sigmoid ํจ์
e = np.exp(1) # 2.71828182845904
np.exp(data2)# [2.71828183e+00, 1.21824940e+01, 2.85027336e+01, 9.94843156e+01, 1.03868737e+24]
๋ฐ์ฌ๋ฆผ ํจ์
np.ceil(data2) # [ 1., 3., 4., 5., 56.] - ํฐ ์ ์ ์ฌ๋ฆผ
np.rint(data2) # [ 1., 2., 3., 5., 55.] - ๊ฐ์ฅ ๊ฐ๊น์ด ์ ์ ์ฌ๋ฆผ
np.round(data2, 1) # [ 1. , 2.5, 3.4, 4.6, 55.3]
๊ฒฐ์ธก์น ์ฒ๋ฆฌ
data3 = np.array([1, 2.5, 3.35, 4.6, np.nan])
data3 # [1. , 2.5 , 3.35, 4.6 , nan]
np.isnan(data3) # [False, False, False, False, True]
๊ฒฐ์ธก์น ์ ์ธ : ์กฐ๊ฑด์
result = data3[np.logical_not(np.isnan(data3))] # True -> False
result # [1. , 2.5 , 3.35, 4.6 ]
data3[~np.isnan(data3)] # ๋ถ์ ๊ธฐํธ : ~
2. numpy ๊ฐ์ฒด ์ ๊ณต ํจ์ : object.ํจ์() = method
np.random.randn() - ํจํค์ง.๋ชจ๋.ํจ์()
data4 = np.random.randn(100, 400) # 2์ฐจ์ ํ์ค์ ๊ท๋ถํฌ ๋์ : N(0, 1^2)
data4
array([[ 0.70393722, 0.22611989, -0.26978612, -0.15859705],
[ 1.05361659, -0.10306144, 0.03118237, 0.22184977],
[-0.77531471, 1.33257045, -0.6020415 , -0.15902342]])
type(data4) # numpy.ndarray
data4.size # 12 -> 40000
data4.shape # (3, 4) -> (100, 400)
data4.sum() # 1.5014520523479282
data4.mean() # 0.12512100436232734 -> 0.0037392158490974635
data4.std() # 0.604996608366 -> 1.0006175982808039
data4.max() # 1.3325704466284878
data4.min() # -0.7753147073709586
method ๋ชฉ๋ก ํ์ธ
dir(data4)
3. axis ์์ฑ
data5 = np.random.randn(10, 20)
data5
ํ์ถ(axis=0) : ๊ฐ์ ์ด ๋ชจ์ = ์ด ๋จ์ ํต๊ณ
์ด์ถ(axis=1) : ๊ฐ์ ํ ๋ชจ์ = ํ ๋จ์ ํต๊ณ
์ ์ฒด ํ๊ท
data5.mean() # 0.03548252444038019
ํ์ถ ํ๊ท
data5.mean(axis = 0) # 1d(20)
์ด์ถ ํ๊ท
data5.mean(axis = 1) # 1d(10)
'๋ฐ์ดํฐ๋ถ์๊ฐ ๊ณผ์ > Python' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
DAY49. Python Statis Scipy (์นด์ด์ ๊ณฑ๊ฒ์ , T๊ฒ์ , ๊ณต๋ถ์ฐ, ํ๊ท๋ถ์) (0) | 2021.11.30 |
---|---|
DAY48. Python Numpy (2)reshape, ๋์, ํ๋ ฌ๊ณฑ (0) | 2021.11.29 |
DAY46. Python Group & Apply (0) | 2021.11.25 |
DAY45. Python Matplot (3)Seaborn (0) | 2021.11.23 |
DAY44. Python Matplot (2)PandasPlot (0) | 2021.11.22 |