Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
774 views
in Technique[技术] by (71.8m points)

r - Plotting Partial Least Squares Regression (plsr) biplot with ggplot2

Using the data.frame below (Source: http://eric.univ-lyon2.fr/~ricco/tanagra/fichiers/en_Tanagra_PLSR_Software_Comparison.pdf)

Data

df <- read.table(text = c("
diesel  twodoors    sportsstyle wheelbase   length  width   height  curbweight  enginesize  horsepower  horse_per_weight    conscity    price   symboling
0   1   0   97  172 66  56  2209    109 85  0.0385  8.7 7975    2
0   0   0   100 177 66  54  2337    109 102 0.0436  9.8 13950   2
0   0   0   116 203 72  57  3740    234 155 0.0414  14.7    34184   -1
0   1   1   103 184 68  52  3016    171 161 0.0534  12.4    15998   3
0   0   0   101 177 65  54  2765    164 121 0.0438  11.2    21105   0
0   1   0   90  169 65  52  2756    194 207 0.0751  13.8    34028   3
1   0   0   105 175 66  54  2700    134 72  0.0267  7.6 18344   0
0   0   0   108 187 68  57  3020    120 97  0.0321  12.4    11900   0
0   0   1   94  157 64  51  1967    90  68  0.0346  7.6 6229    1
0   1   0   95  169 64  53  2265    98  112 0.0494  9.0 9298    1
1   0   0   96  166 64  53  2275    110 56  0.0246  6.9 7898    0
0   1   0   100 177 66  53  2507    136 110 0.0439  12.4    15250   2
0   1   1   94  157 64  51  1876    90  68  0.0362  6.4 5572    1
0   0   0   95  170 64  54  2024    97  69  0.0341  7.6 7349    1
0   1   1   95  171 66  52  2823    152 154 0.0546  12.4    16500   1
0   0   0   103 175 65  60  2535    122 88  0.0347  9.8 8921    -1
0   0   0   113 200 70  53  4066    258 176 0.0433  15.7    32250   0
0   0   0   95  165 64  55  1938    97  69  0.0356  7.6 6849    1
1   0   0   97  172 66  56  2319    97  68  0.0293  6.4 9495    2
0   0   0   97  172 66  56  2275    109 85  0.0374  8.7 8495    2"), header = T)

and this

Code

library(pls)
Y <- as.matrix(df[,14])
X <- as.matrix(df[,1:11])
df.pls <- mvr(Y ~ X, ncomp = 3, method = "oscorespls", scale = T)
plot(df.pls, "biplot")

I got this

Biplot

enter image description here

Any help to plot the pls biplot using ggplot2 will be appreciated?

See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)
#Read data
df <- read.table(text = c("
diesel  twodoors    sportsstyle wheelbase   length  width   height  curbweight  enginesize  horsepower  horse_per_weight    conscity    price   symboling
                          0   1   0   97  172 66  56  2209    109 85  0.0385  8.7 7975    2
                          0   0   0   100 177 66  54  2337    109 102 0.0436  9.8 13950   2
                          0   0   0   116 203 72  57  3740    234 155 0.0414  14.7    34184   -1
                          0   1   1   103 184 68  52  3016    171 161 0.0534  12.4    15998   3
                          0   0   0   101 177 65  54  2765    164 121 0.0438  11.2    21105   0
                          0   1   0   90  169 65  52  2756    194 207 0.0751  13.8    34028   3
                          1   0   0   105 175 66  54  2700    134 72  0.0267  7.6 18344   0
                          0   0   0   108 187 68  57  3020    120 97  0.0321  12.4    11900   0
                          0   0   1   94  157 64  51  1967    90  68  0.0346  7.6 6229    1
                          0   1   0   95  169 64  53  2265    98  112 0.0494  9.0 9298    1
                          1   0   0   96  166 64  53  2275    110 56  0.0246  6.9 7898    0
                          0   1   0   100 177 66  53  2507    136 110 0.0439  12.4    15250   2
                          0   1   1   94  157 64  51  1876    90  68  0.0362  6.4 5572    1
                          0   0   0   95  170 64  54  2024    97  69  0.0341  7.6 7349    1
                          0   1   1   95  171 66  52  2823    152 154 0.0546  12.4    16500   1
                          0   0   0   103 175 65  60  2535    122 88  0.0347  9.8 8921    -1
                          0   0   0   113 200 70  53  4066    258 176 0.0433  15.7    32250   0
                          0   0   0   95  165 64  55  1938    97  69  0.0356  7.6 6849    1
                          1   0   0   97  172 66  56  2319    97  68  0.0293  6.4 9495    2
                          0   0   0   97  172 66  56  2275    109 85  0.0374  8.7 8495    2"), header = T)

#Run OP's code
library(pls)
library(ggplot2)
Y <- as.matrix(df[,14])
X <- as.matrix(df[,1:11])
df.pls <- mvr(Y ~ X, ncomp = 3, method = "oscorespls", scale = T)

#Extract information from mvr object
df2<-df.pls$scores
comp1a<-df2[,1]
comp2a<-df2[,2]
df2<-as.data.frame(cbind(comp1a, comp2a))

df1<-df.pls$loadings
comp1<-df1[,1]
comp2<-df1[,2]
names<-df1[,0]
df1<-as.data.frame(cbind(names, comp1, comp2))

#Generate two plots and overlay
#Plot 1
p1<-ggplot(data=df1, aes(comp1,comp2))+
  ylab("")+xlab("")+ggtitle("X scores and X Loadings")+
  theme_bw() + theme(panel.border = element_rect(colour = "black", fill=NA, size=1),panel.grid.major = element_blank(), 
                     panel.grid.minor = element_blank(), 
                     axis.line = element_line(colour = "black"))+
  geom_text(aes(label=rownames(df1)), color="red")+
  scale_x_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6))+
  scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6))+
  coord_fixed(ylim=c(-0.6, 0.6),xlim=c(-0.6, 0.6))+
  theme(axis.ticks = element_line(colour = "red")) +
  theme(axis.text.y=element_text(angle = 90, hjust = 0.65)) +
  theme(axis.text.y = element_text(margin=margin(10,10,10,5,"pt")))

#Plot 2
p2<-ggplot(data=df2, aes(comp1a,comp2a))+
  ylab("Comp 2")+xlab("Comp 1")+ggtitle("X scores and X Loadings")+
  theme_bw() + theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
                     panel.grid.major = element_blank(), 
                     panel.grid.minor = element_blank(), 
                     axis.line = element_line(colour = "black"))+
  geom_text(aes(label=rownames(df2)))+
  xlim(-4,4)+ylim(-4,4)+
  scale_y_continuous(breaks = c(-4,-2,0,2))+
  coord_cartesian(ylim=c(-4, 4))+
  scale_x_continuous(breaks = c(-4,-2,0,2)) +
  theme(plot.title = element_text(face="bold"))+ 
  theme(axis.text.y=element_text(angle = 90, hjust = 0.65))

#Function to overlay plots in order to get two graphs with different axes on same plot
library(grid)
library(gtable)
ggplot_dual_axis = function(plot1, plot2, which.axis = "x") {
  # Update plot with transparent panel
  plot2 = plot2 + theme(panel.background = element_rect(fill = NA))
  grid.newpage()
  # Increase right margin if which.axis == "y"
  if(which.axis == "y") plot1 = plot1 + theme(plot.margin = unit(c(0.7, 1.5, 0.4, 0.4), "cm"))
  # Extract gtable
  g1 = ggplot_gtable(ggplot_build(plot1))
  g2 = ggplot_gtable(ggplot_build(plot2))
  # Overlap the panel of the second plot on that of the first
  pp = c(subset(g1$layout, name == "panel", se = t:r))
  g = gtable_add_grob(g1, g2$grobs[[which(g2$layout$name=="panel")]], pp$t, pp$l, pp$b, pp$l)

  # Steal axis from second plot and modify
  axis.lab = ifelse(which.axis == "x", "axis-b", "axis-l")
  ia = which(g2$layout$name == axis.lab)
  ga = g2$grobs[[ia]]
  ax = ga$children[[2]]
  # Switch position of ticks and labels
  if(which.axis == "x") ax$heights = rev(ax$heights) else ax$widths = rev(ax$widths)
  ax$grobs = rev(ax$grobs)
  if(which.axis == "x") 
    ax$grobs[[2]]$y = ax$grobs[[2]]$y - unit(1, "npc") + unit(0.15, "cm") else
      ax$grobs[[1]]$x = ax$grobs[[1]]$x - unit(1, "npc") + unit(0.15, "cm")
  # Modify existing row to be tall enough for axis
  if(which.axis == "x") g$heights[[2]] = g$heights[g2$layout[ia,]$t]
  # Add new row or column for axis label
  if(which.axis == "x") {
    g = gtable_add_grob(g, ax, 2, 4, 2, 4) 
    g = gtable_add_rows(g, g2$heights[1], 1)
    g = gtable_add_grob(g, g2$grob[[6]], 2, 4, 2, 4)
  } else {
    g = gtable_add_cols(g, g2$widths[g2$layout[ia, ]$l], length(g$widths) - 1)
    g = gtable_add_grob(g, ax, pp$t, length(g$widths) - 1, pp$b) 
    g = gtable_add_grob(g, g2$grob[[7]], pp$t, length(g$widths), pp$b - 1)
  }
  # Draw it
  grid.draw(g)
}

#Run function on individual plots
ggplot_dual_axis(p2, p1, "y")

enter image description here


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...