์ •ํ˜•VS๋น„์ •ํ˜•

๋น„์ •ํ˜• ex) text > ์ „์ฒ˜๋ฆฌ (๋ถˆ์šฉ์–ด ์ •์ œ ๋“ฑ) > Rํ†ต๊ณ„ ํŒจํ‚ค์ง€ > ์ถ”๋ก ํ†ต๊ณ„, ํŒจํ„ด, ๊ทœ์น™

๋ฐ˜์ •ํ˜• ex) HTML > ์ „์ฒ˜๋ฆฌ> Rํ†ต๊ณ„ ํŒจํ‚ค์ง€ > ์ถ”๋ก ํ†ต๊ณ„, ํŒจํ„ด, ๊ทœ์น™

์ •ํ˜• ex) DB> Rํ†ต๊ณ„ ํŒจํ‚ค์ง€ > ์ถ”๋ก ํ†ต๊ณ„, ํŒจํ„ด, ๊ทœ์น™

 

 

 

 

 

์ •ํ˜•๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ

1. JDK & JRE ์„ค์น˜ : RJDBC ํŒจํ‚ค์ง€๋ฅผ ์‚ฌ์šฉํ•˜๊ธฐ ์œ„ํ•ด์„œ๋Š” java ์„ค์น˜
C:\ITWILL\2_Rwork\tools\jdk-8u151-windows-x64.exe ์„ค์น˜ํŒŒ์ผ ์ด์šฉ 

2. ํŒจํ‚ค์ง€ ์„ค์น˜

install.packages("rJava") # R์—์„œ JAVA ์‚ฌ์šฉ ์ง€์› 
install.packages("DBI") #DB Interface : SELECT๋ฌธ
install.packages("RJDBC") #R+DB์—ฐ๋™ (rJava์˜์กด์„ฑ)


3. ํŒจํ‚ค์ง€ ๋กœ๋”ฉ : ์ˆœ์„œ๋ฅผ ์ง€์ผœ in memory์‹œ์ผœ์•ผ ํ•œ๋‹ค

library(DBI)
Sys.setenv(JAVA_HOME='C:\\Program Files\\Java\\jdk1.8.0_151') # JAVA ์„ค์น˜ ๊ฒฝ๋กœ
library(rJava) 
library(RJDBC) # rJava์— ์˜์กด์ 


4. Oracle ์—ฐ๋™ (Oracle 11g)
๋‹จ๊ณ„1: driver ๊ฐ์ฒด (ํ”„๋กœ๊ทธ๋žจex.R,Pyton..๊ณผ DB๋ฅผ ์—ฐ๊ฒฐ)

drv<-JDBC(driverClass="oracle.jdbc.driver.OracleDriver", 
          classPath="C:\\oraclexe\\app\\oracle\\product\\11.2.0\\server\\jdbc\\lib\\ojdbc6.jar")


๋‹จ๊ณ„2 : db์—ฐ๋™(driver, url, user, password)
1๋‹จ๊ณ„์—์„œ ์—ฐ๋™ํ•œ drv๋ฅผ ์—ฐ๊ฒฐ

conn <- dbConnect(drv=drv, 
                  url="jdbc:oracle:thin:@//127.0.0.1:1521/xe", #127~ ํฌํŠธ๋ฒˆํ˜ธ/DB์ด๋ฆ„
                  user="scott", 
                  password="tiger")


1.์ „์ฒด ํ…Œ์ด๋ธ” ์กฐํšŒ

query = 'select*from tab'
dbGetQuery(conn, query) #DBI์†Œ์† ํŒจํ‚ค์ง€


2. table ์ƒ์„ฑ : DB(XE)์— ์ €์žฅ๋จ

query = "create table db_test(sid int, pwd char(4), name varchar(25), age int)"
mode(query) #"character":๋ฌธ์ž์—ด

dbSendUpdate(conn, query) #RJDBC์†Œ์† ํŒจํ‚ค์ง€. table์ƒ์„ฑ ํ•จ์ˆ˜
dbGetQuery(conn, "select*from tab") #table์กฐํšŒ
dbGetQuery(conn, "select*from emp") #table์˜ ๊ตฌ์กฐ ํ™•์ธ


3. DB๋‚ด์šฉ ์ˆ˜์ •

: dbSendUpdateํ•จ์ˆ˜(conn, query(=create/insert/update/delete))


1) INSERT๋ฌธ (๋ ˆ์ฝ”๋“œ ์ถ”๊ฐ€)

query = "INSERT INTO DB_TEST values(1001, '1234', 'ํ™๊ธธ๋™', 35)" #''์™€ ""๋ชจ๋‘ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•˜์ง€๋งŒ ''์˜ ๊ฒฝ์šฐ '๋ฌธ์ž์—ด'๊ณผ ํ˜ผ์šฉ๋  ์ˆ˜ ์žˆ์Œ.
dbSendUpdate(conn, query) #dbํ•จ์ˆ˜๋ฅผ ์จ์•ผ ๋ฐ˜์˜๋จ
dbGetQuery(conn, "select*from DB_TEST") #์ถ”๊ฐ€ํ•œ ๋ ˆ์ฝ”๋“œ ํ™•์ธ


2) UPDATE๋ฌธ

query = "UPDATE DB_TEST set name='๊น€๊ธธ๋™' WHERE sid = 1001"
dbSendUpdate(conn, query)
dbGetQuery(conn, "select*from DB_TEST")


3) DELETE๋ฌธ

query = "DELETE FROM DB_TEST where sid=1001"
dbSendUpdate(conn, query)
dbGetQuery(conn, "select*from DB_TEST")


4) table์‚ญ์ œ

dbSendUpdate(conn, "DROP TABLE DB_TEST PURGE")
dbGetQuery(conn, "select*from tab")


5. table๋ถˆ๋Ÿฌ์˜ค๊ธฐ

emp = dbGetQuery(conn, "select*from emp") #R์˜ ๊ฐ์ฒด๋กœ ์ €์žฅ
str(emp) #data.frame์œผ๋กœ ์ƒ์„ฑ. num์ˆซ์žํ˜• chr๋ฌธ์žํ˜•

 

 





1) ํ†ต๊ณ„ํ•จ์ˆ˜ : ๋น„์œจ/๋“ฑ๊ฐ„ ์ฒ™๋„

mean(emp$SAL) #2073.214
mean(emp$COMM) #NA
mean(emp$COMM, na.rm=T) #๊ฒฐ์ธก์น˜๋ฅผ ์ œ์™ธํ•œ ๊ฐ’๋งŒ ๊ตฌํ•˜๊ธฐ. 550
summary(emp)


2) ์กฐ๊ฑด ์กฐํšŒ

query = "select * from emp where sal>=2500 and job='MANAGER'" #๋…ผ๋ฆฌ์—ฐ์‚ฐ์ž and ์‚ฌ์šฉ. '๋ฌธ์ž์—ด'์€ ๋Œ€์†Œ๋ฌธ์ž ๊ตฌ๋ถ„
manager_2500 = dbGetQuery(conn, query)
manager_2500

professor : ์ •๊ต์ˆ˜
name_pay = dbGetQuery(conn, "select name, pay from professor
                      where position='์ •๊ต์ˆ˜'")
str(name_pay)
name_pay

barplot(name_pay$PAY, col='green', names.arg = name_pay$NAME) #names.arg:x์ถ• ๋ˆˆ๊ธˆ๋งˆ๋‹ค ๊ต์ˆ˜๋ช… ๋ถ™์ด๊ธฐ


3) subquery ์ด์šฉํ•ด์„œ ํŠน์ • ๋ถ€์„œ์˜ ์‚ฌ์›์ •๋ณด ์ถœ๋ ฅ (SQL์—์„œ์™€ ๋™์ผํ•˜๊ฒŒ ์‚ฌ์šฉ)
* sub : dept, main : emp

dbGetQuery(conn, 'select * from dept')

query = "select ename, sal, job from emp
where deptno= 
(select deptno from dept where dname='SALES')"
sales = dbGetQuery(conn, query)
sales


4) join : inner join

query = "select e.ename, e.job, d.*
from emp e, dept d
where e.deptno=d.deptno and e.ename like'%M%'"

 

* ์–‘์ชฝ ํ…Œ์ด๋ธ”์—์„œ ๋ถ€์„œ๋ฒˆํ˜ธ๊ฐ€ ๊ฐ™์€ (์กฐ์ธ์กฐ๊ฑด)
* and์ดํ•˜ : ์ด๋ฆ„์— M์ด ํฌํ•จ๋˜์–ด ์žˆ๋Š” ์‚ฌ์› (์ผ๋ฐ˜์กฐ๊ฑด๊ฑด)

join_df = dbGetQuery(conn, query)
join_df #R์˜ ๊ฐ์ฒด


5) GROUP BY : ๊ฐ ๋ถ€์„œ๋ณ„ ๊ธ‰์—ฌ ํ‰๊ท , ํ•ฉ๊ณ„ ๊ตฌํ•˜๊ธฐ

query = "select deptno, avg(sal) avg_sal, sum(sal) tot_sal
from emp
group by deptno
order by deptno"

dept_group = dbGetQuery(conn, query)
dept_group

* select์ ˆ์—์„œ ๋ณ„์นญ ์„ค์ • ๊ฐ€๋Šฅ


DB ์—ฐ๊ฒฐ ์ข…๋ฃŒ

dbDisconnect(conn) #TRUE ์ข…๋ฃŒ

 

 

 

 

 

 

์—ฐ์Šต๋ฌธ์ œ

01. ๋‹ค์Œ๊ณผ ๊ฐ™์€ ๋‹จ๊ณ„๋ฅผ ํ†ตํ•ด์„œ ํ…Œ์ด๋ธ”์„ ํ˜ธ์ถœํ•˜๊ณ , SQL๋ฌธ์„ ์ด์šฉํ•˜์—ฌ ๋ ˆ์ฝ”๋“œ๋ฅผ ์กฐํšŒํ•˜์‹œ์˜ค.
[๋‹จ๊ณ„ 1] ์‚ฌ์›ํ…Œ์ด๋ธ”(EMP)์„ ๊ฒ€์ƒ‰ํ•˜์—ฌ ๊ฒฐ๊ณผ๋ฅผ EMP_DF ๋ณ€์ˆ˜๋กœ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
1) ์ „์ฒด ๋ ˆ์ฝ”๋“œ ์กฐํšŒ

EMP_DF <- dbGetQuery(conn, "select * from EMP")  
EMP_DF


2) ๋˜๋Š” ๋ถ€์„œ๋ณ„ ์กฐํšŒ 

EMP_DF2 <- dbGetQuery(conn, "select deptno, avg(sal) avg_sal 
                             from EMP  
                             group by deptno
                             order by deptno")
                             
EMP_DF2
#    DEPTNO  AVG_SAL
#1     10 2916.667
#2     20 2175.000
#3     30 1566.667


๋‹จ๊ณ„ 2] EMP_DF ๋ณ€์ˆ˜๋ฅผ ๋Œ€์ƒ์œผ๋กœ ๋ถ€์„œ๋ณ„ ๊ธ‰์—ฌ์˜ ํ‰๊ท ์„ ๋ง‰๋Œ€์ฐจํŠธ๋กœ ์‹œ๊ฐํ™”
1) ์ „์ฒด ๋ ˆ์ฝ”๋“œ ์กฐํšŒ
๊ฐ ๋ถ€์„œ๋ณ„ SUBSET 

dept10 <- subset(EMP_DF, DEPTNO == 10)
dept20 <- subset(EMP_DF, DEPTNO == 20)
dept30 <- subset(EMP_DF, DEPTNO == 30)


๊ฐ ๋ถ€์„œ๋ณ„ ๊ธ‰์—ฌ ํ‰๊ท  

dept10_sal <- mean(dept10$SAL)
dept20_sal <- mean(dept20$SAL)
dept30_sal <- mean(dept30$SAL)


[๋‹จ๊ณ„ 3] ๋ง‰๋Œ€์ฐจํŠธ๋ฅผ ๋Œ€์ƒ์œผ๋กœ X์ถ•์˜ ์ถ•๋ˆˆ๊ธˆ์„ ๋ถ€์„œ๋ช…์œผ๋กœ ํ‘œ์‹œํ•˜๊ธฐ
1) ์ „์ฒด ๋ ˆ์ฝ”๋“œ ์กฐํšŒ

dept_sal_avg <- c(dept10_sal, dept20_sal, dept30_sal)


X์ถ• ๋ˆˆ๊ธˆ : ๋ถ€์„œ ์ด๋ฆ„ ํ‘œ์‹œ 

bp <- barplot(dept_sal_avg, col = rainbow(3),
        names.arg = c('10', '20', '30'), ylim = c(0, 3500))

text(x=bp, y=dept_sal_avg+80, round(dept_sal_avg, 3))


2) ๋ถ€์„œ๋ณ„ ์กฐํšŒ ๊ฒฝ์šฐ

DEPT <- dbGetQuery(conn, "select * from DEPT")


table join : ๋ถ€์„œ๋ช…์œผ๋กœ ํ‘œ์‹œ 

library(plyr)
join_df <- join(EMP_DF2, DEPT, by='DEPTNO')
join_df

bp <- barplot(join_df$AVG_SAL, col = rainbow(3), 
              names.arg = join_df$DNAME, ylim = c(0, 3500))

text(x=bp, y=EMP_DF2$AVG_SAL+80, round(EMP_DF2$AVG_SAL, 3))

 



02. 'WARD' ์‚ฌ์›์˜ ์ƒ์‚ฌ(MGR)์™€ ๋™์ผํ•œ ์‚ฌ์›์ด๋ฆ„, ์ง์ฑ…, ์—ฐ๋ด‰์„ ์ถœ๋ ฅํ•˜์‹œ์˜ค.
<์กฐ๊ฑด1> ์„œ๋ธŒ์ฟผ๋ฆฌ ์ด์šฉ

query <- "select ename, job, sal from EMP
          where MGR = (select MGR from EMP where ename = 'WARD')"
result <- dbGetQuery(conn, query)


<์กฐ๊ฑด2> ์ฐจํŠธ ๊ฒฐ๊ณผ ์ฐธ๊ณ  

result
pie(result$SAL, labels = result$ENAME, col = rainbow(5),
    main = "WARD ์‚ฌ์›์˜ ์ƒ์‚ฌ์™€ ๊ฐ™์€ ์‚ฌ์›๋“ค์˜ ๊ธ‰์—ฌ")











+ Recent posts