DataBaser.Net: R그래픽스-유용한 예제들

1 3d surface
2 magick
3 그룹별 분포를 3차원스럽게 보이기(ggjoy)
4 ggplot x축 비틀기
5 잔차 plot
6 버블차트(bubble chart)
7 area chart 아니고, 범위(상한~하한) 영역 칠하기
8 이중축
9 abline, legend
10 인터랙티브 플롯팅
11 xlab, ylab, title 조정
12 text
13 parallel plot?
14 범례의 그림크기 조절
15 x축 라벨바꾸기
16 x축 날짜형식
17 ggplot2::stat_density2d()
18 boxplot.2d
19 기본 scatter plot
20 마우스로 클릭하여 값 알아내기
21 smoothScatter
22 ggpairs
23 pairs(커스터마이징된)
24 Spider(Radar) Chart

1 3d surface #

https://stackoverflow.com/questions/41700400/smoothing-3d-plot-in-r

library(mgcv)

x<- rnorm(200)
y<- rnorm(200)
z<-rnorm(200)

tab<-data.frame(x,y,z)
tab

#surface wireframe:

mod <- gam(z ~ te(x, y), data = tab)

library(rgl)
library(deldir)

zfit <- fitted(mod)
col <- cm.colors(20)[1 + 
         round(19*(zfit - min(zfit))/diff(range(zfit)))]

persp3d(deldir(x, y, z = zfit), col = col)
aspect3d(1, 2, 1)

[edit]

2 magick #

이미지 처리 패키지

https://cran.r-project.org/web/packages/magick/vignettes/intro.html

[edit]

3 그룹별 분포를 3차원스럽게 보이기(ggjoy) #

대략 이런 모양

--출처: http://rpubs.com/ianrmcdonald/293304

더 이쁘게..

https://www.r-bloggers.com/joyplot-for-gsea-result/

[edit]

4 ggplot x축 비틀기 #

ggplot(df1, aes(x=grp, y=val)) + geom_boxplot(outlier.shape = NA) + ylim(0, 30) + theme(axis.text.x = element_text(angle = 90, hjust = 1))

45도 비틀기: theme(axis.text.x = element_text(angle = 45, hjust = 1))
90도 비틀기: theme(axis.text.x = element_text(angle = 90, hjust = 1))

[edit]

5 잔차 plot #

--Practical Data Science with R, ISBN 978-1-61729-156-2

d <- data.frame(y=(1:10)^2,x=1:10)
model <- lm(y~x,data=d)
d$prediction <- predict(model,newdata=d)

library('ggplot2')
ggplot(data=d) + geom_point(aes(x=x,y=y)) +
geom_line(aes(x=x,y=prediction),color='blue') +
geom_segment(aes(x=x,y=prediction,yend=y,xend=x)) +
scale_y_continuous('')

[edit]

6 버블차트(bubble chart) #

dfx = data.frame(ev1=1:10, ev2=sample(10:99, 10), ev3=10:1)
symbols(x=dfx$ev1, y=dfx$ev2, circles=dfx$ev3, inches=1/3, ann=F, bg="steelblue2", fg=NULL)

[edit]

7 area chart 아니고, 범위(상한~하한) 영역 칠하기 #

polygon(c(x, rev(x)), c(y$upr, rev(y$lwr)), col = "gray", border = NA)

[edit]

8 이중축 #

plotrix 패키지 참고 및 출처 --> http://rpubs.com/cardiomoon/19042

library("plotrix")
going_up <- seq(3, 7, by = 0.5) + rnorm(9)
going_down <- rev(60:74) + rnorm(15)

twoord.plot(2:10, going_up, 1:15, going_down, xlab = "Sequence", ylab = "Ascending values", 
            rylab = "Descending values", lcol = 4, main = "Plot with two ordinates - points and lines", 
            do.first = "plot_bg();grid(col=\"white\",lty=1)")

twoord.plot(2:10, going_up, 1:15, going_down, xlab = "Sequence", lylim = range(going_up) + 
              c(-1, 10), rylim = range(going_down) + c(-10, 2), ylab = "Ascending values", 
            ylab.at = 5, rylab = "Descending values", rylab.at = 65, lcol = 4, main = "Plot with two ordinates - separated lines", 
            lytickpos = 3:7, rytickpos = seq(55, 75, by = 5), do.first = "plot_bg();grid(col=\"white\",lty=1)")

다른 예제

set.seed(2015-04-13)

d = data.frame(x =seq(1,10),
           n = c(0,0,1,2,3,4,4,5,6,6),
           logp = signif(-log10(runif(10)), 2))

#좌
par(mar = c(5,5,2,5))
with(d, plot(x, logp, type="l", col="red3", 
             ylab=expression(-log[10](italic(p))),
             ylim=c(0,3)))

#우
par(new = T)
with(d, plot(x, n, pch=16, axes=F, xlab=NA, ylab=NA, cex=1.2))
axis(side = 4)
mtext(side = 4, line = 3, 'Number genes selected')
legend("topleft",
       legend=c(expression(-log[10](italic(p))), "N genes"),
       lty=c(1,0), pch=c(NA, 16), col=c("red3", "black"))

--출처: http://www.r-bloggers.com/r-single-plot-with-two-different-y-axes/

[edit]

9 abline, legend #

library(party)
library(caret)
gtree <- ctree(Species ~ ., data = iris)
plot(gtree)

attach(iris)
colour <- c("black", "red", "blue")
plot(Petal.Width, Petal.Length, pch=20, col=c(colour[Species]))
abline(h=4.8, col="blue", lty=2);text(0.75,5,"Petal.Length > 4.8", col="blue")
abline(h=1.9, col="black", lty=2);text(1,2.2,"Petal.Length > 1.9", col="black")
abline(v=1.7, col="red", lty=2);text(2,4,"Petal.Width > 1.7", col="red")
legend(0.1, 6.5, c("setosa","versicolor", "virginica"), pch=20, col=colour)

[edit]

10 인터랙티브 플롯팅 #

Interactive Plotting with Manipulate

[edit]

11 xlab, ylab, title 조정 #

for(i in 1:max(cl$cluster)){
  p_cluster <- ggplot(tmp[tmp$cluster == i, ], aes(x=variable, y=value, colour=factor(cluster))) 
  p_cluster + geom_line() + ggtitle(paste0("cluster", i))+theme(axis.text=element_text(size=20),
  axis.title=element_text(size=20,face="bold"),plot.title=element_text(family="Times", face="bold", size=20))
  ggsave(file=paste0("c:\\plot\\cluster", i, ".png"))  
}

[edit]

12 text #

prior <- seq(0, 1, 0.1)
posterior <- (piror*1/2)/(piror*1/2+(1-piror)*0.09)
plot(prior, posterior, main="친자확인소송", type="l")
abline(0,1)
abline(h=0.5, v=0.5)
point_label <- paste0("(", prior,", ", round(posterior, 3), ")")
text(prior, posterior, point_label, cex=0.8, pos=4, col="red")

[edit]

13 parallel plot? #

[edit]

14 범례의 그림크기 조절 #

p<-ggplot(df, aes(x=일자, y=게임수, colour=장르)) + geom_point(size=4) + facet_wrap( ~ 국가, nrow=3)
p + theme(
        strip.text.x = element_text(size=20), 
        legend.title = element_text(size=20, face="bold"), 
        legend.text = element_text(size = 20, face = "bold")) + 
    guides(size=10,colour = guide_legend(override.aes = list(size=7)))

[edit]

15 x축 라벨바꾸기 #

plot(tmp1$stage_no, tmp1$md, axes = FALSE, xlab="스테이지", ylab="정체판수")
axis(1, tmp1$stage_no,tmp1$stage_nm, las=2) #las=2는 라벨을 세로로
axis(2);box()

[edit]

16 x축 날짜형식 #

library(ggplot2)
library("scales") #date_format()

ggplot(loess.df, aes(x=dt, y=s)) + geom_point() + geom_smooth() + 
    labs(x = "일자", y = "일 시장규모(억)") +
    scale_x_date(labels = date_format("%Y-%m"))

#xaxt='n' 옵션을 줘야한다.
plot(df$dt, df$amt, type="l", xaxt='n')
axis.Date(side=1, df$dt, format = "%Y-%m-%d")

#strptime(x1, "%Y-%m-%d %H:%M:%OS")

[edit]

17 ggplot2::stat_density2d() #

2차원 그래프로 산점도로는 잘 보이지 않는 데이터들의 그룹을 식별하기 좋다.

rs <- data.frame()
for(i in 1:4){
    x <- c(rnorm(200,0,i),rnorm(200,4,i))
    y <- c(rnorm(200,0,i),rnorm(200,4,i))
    grp <- replicate(length(x), i)
    rs <- rbind(rs, data.frame(grp, x, y))
}

library(ggplot2)
p <- ggplot(rs, aes(x=x, y=y)) 
p + stat_density2d() + facet_wrap( ~ grp, nrow=2)

[edit]

18 boxplot.2d #

#install.packages("hdrcde")
library("hdrcde")
 
x <- c(rnorm(200,0,1),rnorm(200,4,1))
y <- c(rnorm(200,0,1),rnorm(200,4,1))

par(mfrow=c(2,2))
plot(x,y, pch="+", cex=.5)
hdr.boxplot.2d(x,y)
plot(hdr.2d(x, y), pointcol="red", show.points=TRUE, pch=3)
par(mfrow=c(1,1))

[edit]

19 기본 scatter plot #

colour <- c("red", "blue", "black")
plot(iris$Sepal.Length, iris$Sepal.Width, col=c(colour[iris$Species]))

[edit]

20 마우스로 클릭하여 값 알아내기 #

plot(dau ~ dau_pred, data=grossing)

identify() 함수를 사용하면 알고자 하는 값들을 클릭 후 esc를 누르면 차트에 클릭된 곳의 값을 인식한다.

#rowname()이 차트에 찍힌다.
identify(grossing$dau_pred, grossing$dau, labels=row.names(grossing)) 

#x,y값이 찍힌다. 물론 esc를 눌렀을 때에 순서대로 rowname도 출력된다.
identify(grossing$dau_pred, grossing$dau, labels=paste0("x=",grossing$dau_pred, "\ny=", grossing$dau))

아래의 코드는 identify() 함수와 달리 클릭하자마자 값을 알 수 있다.

repeat {
  click.loc <- locator(1)
  if(!is.null(click.loc)){
      text( x = click.loc$x, y = click.loc$y, 
            labels = paste0("x=",round(click.loc$x,0), "\ny=", round(click.loc$y)), 
            cex = 0.8, col = "blue" )
  } 
  else break
}

[edit]

21 smoothScatter #

일반적으로 산점도(scatter plot)를 그리면 다음과 같다.

plot(x,y)

그런데, 데이터의 양이 많으면 위와 같이 데이터의 분포를 파악하기 어렵다. smoothScatter()는 이런 어려움을 극복할 수 있게 시각화 해준다. 아래의 그림을 보면 3개 정도의 군집이 있는 것을 확인 할 수 있다.

library(graphics)
smoothScatter(x, y)

[edit]

22 ggpairs #

pairs()함수는 data.frame에 대해 산점도를 그려준다. 이 자체로도 정보가 많지만, 더욱 많은 정보를 줄 수 있는 산점도도 있다. 그게 ggpairs()다. 단점은 느리다는 것.

library("GGally")
ggpairs(iris)

[edit]

23 pairs(커스터마이징된) #

panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) {
    usr <- par("usr")
    on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- abs(cor(x, y, use="complete.obs"))
    txt <- format(c(r, 0.123456789), digits=digits)[1]
    txt <- paste(prefix, txt, sep="")
    if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
    text(0.5, 0.5, txt, cex =  cex.cor * (1 + r) / 2)
}
    
panel.hist <- function(x, ...) {
  usr <- par("usr")
  on.exit(par(usr))
  par(usr = c(usr[1:2], 0, 1.5) )
  opar=par(ps=30) #font-size
  h <- hist(x, plot = FALSE)
  breaks <- h$breaks
  nB <- length(breaks)
  y <- h$counts
  y <- y/max(y)
  rect(breaks[-nB], 0, breaks[-1], y, col="white", ...)
}

panel.lm <- function (x, y, col = par("col"), bg = NA, pch = par("pch"),
                            cex = 1, col.smooth = "black", ...) {
    points(x, y, pch = pch, col = col, bg = bg, cex = cex)
    abline(stats::lm(y ~ x),  col = col.smooth, ...)
}

pairs(iris, pch=".",
                   upper.panel = panel.cor,
                   diag.panel  = panel.hist,
                   lower.panel = panel.lm)


#또는 비모수
panel.lowess <- function (x, y, col = par("col"), bg = NA, pch = par("pch"),
                            cex = 1, col.smooth = "black", ...) {
    points(x, y, pch = pch, col = col, bg = bg, cex = cex)
    lines(lowess(x,y))
}

pairs(iris, pch=".",
                   upper.panel = panel.cor,
                   diag.panel  = panel.hist,
                   lower.panel = panel.lowess)

[edit]

24 Spider(Radar) Chart #

tmp <- df[df$job_id == i, 4:7]
title <- head(df[df$job_id == i, 2],1)
tmp <- rbind(rep(12000,4) , rep(0,4), tmp) #max, min값을 세팅해야 함.
rownames(tmp) <- c("1", "2", "Active", "Churn")
colors_border <- c( rgb(0.2,0.5,0.5,0.9), rgb(0.8,0.2,0.5,0.9) , rgb(0.7,0.5,0.1,0.9) )
colors_in <- c( rgb(0.2,0.5,0.5,0.4), rgb(0.8,0.2,0.5,0.4) , rgb(0.7,0.5,0.1,0.4))
#radarchart(tmp, axistype=1, pcol=colors_border, plwd=2, plty=1, axislabcol="grey", cglcol="gray", caxislabels=seq(0,12000,2000), cglwd=0.8, vlcex=0.8, seg=4, title=title)
radarchart(tmp, axistype=1, pcol=colors_border, plwd=2, plty=1, axislabcol="grey", cglcol="gray", caxislabels=seq(0,12000,2000), cglwd=0.8, vlcex=0.8, seg=4, title="")
legend(x=0.7, y=1, legend = rownames(tmp[-c(1,2),]), bty = "n", pch=20 , col=colors_in , text.col = "black", cex=1.2, pt.cex=3)

대박 정보네요. 감사합니다. -- shanmdphd 2017-05-25 14:42:48

Contents

1 3d surface #

2 magick #

3 그룹별 분포를 3차원스럽게 보이기(ggjoy) #

4 ggplot x축 비틀기 #

5 잔차 plot #

6 버블차트(bubble chart) #

7 area chart 아니고, 범위(상한~하한) 영역 칠하기 #

8 이중축 #

9 abline, legend #

10 인터랙티브 플롯팅 #

11 xlab, ylab, title 조정 #

12 text #

13 parallel plot? #

14 범례의 그림크기 조절 #

15 x축 라벨바꾸기 #

16 x축 날짜형식 #

17 ggplot2::stat_density2d() #

18 boxplot.2d #

19 기본 scatter plot #

20 마우스로 클릭하여 값 알아내기 #

21 smoothScatter #

22 ggpairs #

23 pairs(커스터마이징된) #

24 Spider(Radar) Chart #