DAY29. R μ΄μ 리 λ¬Έμ
μ΄μ 리 μ°μ΅λ¬Έμ
2012λ λ―Έκ΅ λμ κΈ°λΆκΈ νν© λ°μ΄ν° μ
election = read.csv(file.choose(), stringsAsFactors = F) # election_2012.csv μ ν
dim(election) # 1001731 16
str(election) # dim + class
<λ°μ΄ν° μ
μ€λͺ
> : 2012λ
λ―Έκ΅ λμ μ('Romney, Mitt'μ 'Obama, Barack') νμκΈ νν©
'data.frame': 1001731 obs. of 16 variables:
3. cand_nm : λμ ν보μμ΄λ¦
4. contbr_nm : νμμμ΄λ¦
5. contbr_city : νμ λμ
9. contbr_occupation : νμμ μ§μ
κ΅°
10. contb_receipt_amt: νμκΈ
11. contb_receipt_dt : νμ λ μ§
chapter 01 : μλ£ν, νλ³ν(λ μ§ λ³ν)
[λ¬Έμ 1] election λ°μ΄ν°μ
μ λ³μλ₯Ό λμμΌλ‘ μλ£νμ νμΈνκ³ μλ£νμ λ³κ²½νμμ€.
μμμκ° : 5λΆ
1) cand_nm, contb_receipt_amt, contb_receipt_dt λ³μμ μλ£ν νμΈνκΈ°
ννΈ) mode() μ΄μ©
mode(election$cand_nm) # "character"
mode(election$contb_receipt_amt) # "numeric"
mode(election$contb_receipt_dt) # "character"
2) νμλ μ§(contb_receipt_dt)λ³μλ₯Ό λ μ§νμΌλ‘ λ³ννκΈ°
date = election$contb_receipt_dt
date[1:10] # "20-Jun-11" "23-Jun-11" -> λ―Έκ΅μ : μΌ-μ-λ
λ
Sys.Date(data) # Error in Sys.Date(data) : unused argument (data)
λ€κ΅μ΄ μ 보 λ³κ²½ : νκ΅ -> μμ΄
Sys.getlocale() # "LC_COLLATE=Korean_Korea
Sys.setlocale(locale = 'English_USA') # λ―Έκ΅μ
λ―Έκ΅μ : μΌ-μ-λ λ -> νκ΅μ : λ λ-μ-μΌ
kdate <- strptime(date, "%d-%b-%y")
kdate[1:10]
λ μ§ν μμ
election$contb_receipt_dt <- kdate
Sys.setlocale(locale = 'Korean_Korea') # νκ΅μ λ³κ²½
chapter 02 : μμΈ(index), μΉΌλΌλͺ
λ³κ²½
[λ¬Έμ 2] election λ°μ΄ν°μ
μ λμμΌλ‘ 6κ° μΉΌλΌ(λ°μ΄ν° μ
μ€λͺ
)λ§ μ ννμ¬ μλ‘μ΄ λ°μ΄ν°μ
μ λ§λ€μμ€.
μμμκ° : 3λΆ
1) μμΈ(index) μ΄μ©νκΈ° : ννΈ) dataset[, c(μ΄index1, μ΄index2, ...)]
election_df = election[,c(3:5,9:11)]
dim(election_df) # 1001731 6
2) election_df μΉΌλΌλͺ
λ³κ²½νκΈ° : ννΈ) names(dataset) <- c('μΉΌλΌλͺ
1','μΉΌλ¬λͺ
2', ...)
μμ μΉΌλΌλͺ :'cand_name','contbr_name','city','occupation','receipt_amt','receipt_date'
names(election_df)
names(election_df) <- c('cand_name','contbr_name','city','occupation','receipt_amt','receipt_date')
names(election_df)
chapter 03 : μλΈμ
(subset) λ§λ€κΈ°
[λ¬Έμ 3] 'Romney, Mitt'μ 'Obama, Barack' λλ Ήν΅ ν보μ λ³λ‘ μλΈμ
(subset)μ μμ±νμμ€.
μμμκ° : 6λΆ
1) λμ ν보μ μ΄λ¦(cand_name)μ λμμΌλ‘ μ€λ³΅λμ§ μμ ν보μ μ΄λ¦κ³Ό κ° ν보μλ³ λΉλμ νμΈνκΈ°
ννΈ) unique() : μ μΌκ° νμΈ, table() : λΉλμ νμΈ
unique(election_df$cand_name) # 13λͺ
- "Romney, Mitt", "Obama, Barack"
table(election_df$cand_name)
2) 'Romney, Mitt'μ 'Obama, Barack' λλ Ήν΅ ν보μ λ³λ‘ μλΈμ
λ§λ€κΈ°
ννΈ) subset(dataset, subset = 쑰건μ)
romney = subset(election_df, subset = cand_name == "Romney, Mitt") # 'Romney, Mitt'
obama = subset(election_df, subset = cand_name == "Obama, Barack")# 'Obama, Barack'
μ°¨μ νμΈ
dim(romney) # 107229 6
dim(obama) # 593746 6
λ΄μ© νμΈ
head(romney)
tail(romney)
head(obama)
tail(obama)