今天突发奇想,用 R 来画分子结构式。由于跟统计图形不沾边,就不往“统计图形与数据可视化”的板块发了,于是还发到这里,当作又一个不务正业的 R 程序。至于画什么东西呢,就画当下正火热的——治疗甲型流感的药物之一:达菲(分子结构式参考了
http://en.wikipedia.org/wiki/Oseltamivir)。
简单整理了一下代码,把零散的变量和函数都放到draw_molecule内部去了(免得污染工作空间):
<br />
init_screen <- function(width = 1, height = 1, mar = rep(0, 4), bg = par("bg"), ...) {<br />
par(mar = mar)<br />
par(bg = bg)<br />
plot(c(0, width), c(0, height), type = "n", xlab = "", ylab = "", axes = FALSE, ...)<br />
}<br />
<br />
draw_molecule <- function(molecule, x = 0, y = 0, w = 0, h = 0, text = "", text_col = "blue", text_bgcol = par("bg")) {<br />
<br />
directions <- c('up', 'upright', 'right', 'downright', 'down', 'downleft', 'left', 'upleft')<br />
<br />
moving <- list(<br />
'up' = list(x = 0, y = 2),<br />
'upright' = list(x = sqrt(3), y = 1),<br />
'right' = list(x = 2 * sqrt(3), y = 0),<br />
'downright' = list(x = sqrt(3), y = -1),<br />
'down' = list(x = 0, y = -2),<br />
'downleft' = list(x = -sqrt(3), y = -1),<br />
'left' = list(x = -2 * sqrt(3), y = 0),<br />
'upleft' = list(x = -sqrt(3), y = 1)<br />
)<br />
<br />
calc_bond_pos <- function(pos, bond) {<br />
x1 <- pos[1]<br />
y1 <- pos[2]<br />
x2 <- x1 + moving[[bond$direction]]$x<br />
y2 <- y1 + moving[[bond$direction]]$y<br />
return(c(x2, y2))<br />
}<br />
<br />
draw_bond <- function(pos1, pos2, bond) {<br />
x1 <- pos1[1]<br />
y1 <- pos1[2]<br />
x2 <- pos2[1]<br />
y2 <- pos2[2]<br />
if (bond$type == "single") {<br />
segments(x1, y1, x2, y2)<br />
} else if (bond$type == "double") {<br />
dx = (y1 - y2) / 15<br />
dy = (x2 - x1) / 15<br />
segments(x1 + dx, y1 + dy, x2 + dx, y2 + dy)<br />
dx = -dx<br />
dy = -dy<br />
segments(x1 + dx, y1 + dy, x2 + dx, y2 + dy)<br />
} else if (bond$type == "triple") {<br />
segments(x1, y1, x2, y2)<br />
dx = (y1 - y2) / 8<br />
dy = (x2 - x1) / 8<br />
segments(x1 + dx, y1 + dy, x2 + dx, y2 + dy)<br />
dx = -dx<br />
dy = -dy<br />
segments(x1 + dx, y1 + dy, x2 + dx, y2 + dy)<br />
}<br />
return(c(x2, y2))<br />
}<br />
<br />
if (length(molecule$atoms) > 0) {<br />
pos <- matrix(0, ncol = 2, nrow = length(molecule$atoms))<br />
for (i in seq_along(molecule$atoms)) {<br />
bonds <- matrix(sapply(molecule$bonds, function(x) sort(c(x$start, x$end))), ncol = 2, byrow = TRUE)<br />
for (j in seq_along(molecule$bonds)) {<br />
if (i == bonds[j, 1]) {<br />
pos[bonds[j, 2],] <- calc_bond_pos(pos[bonds[j, 1],], molecule$bonds[[j]])<br />
}<br />
}<br />
}<br />
pos[,1] <- pos[,1] - min(pos[,1]) + x<br />
pos[,2] <- pos[,2] - max(pos[,2]) + y<br />
if (w > 0) {<br />
pos[,1] <- pos[,1] + (w - (max(pos[,1]) - min(pos[,1]))) / 2<br />
}<br />
if (h > 0) {<br />
pos[,2] <- pos[,2] - (h - (max(pos[,2]) - min(pos[,2]))) / 2<br />
if ( ! is.null(text) && text != "") {<br />
pos[,2] <- pos[,2] + 3<br />
}<br />
}<br />
<br />
for (i in seq_along(molecule$bonds)) {<br />
draw_bond(pos[molecule$bonds[[i]]$start,], pos[molecule$bonds[[i]]$end,], molecule$bonds[[i]])<br />
}<br />
fontSize = 0.6;<br />
for (i in seq_along(molecule$atoms)) {<br />
if (molecule$atoms[i] != "C") {<br />
rect(pos[i, 1] - fontSize, pos[i, 2] - fontSize, pos[i, 1] + fontSize, pos[i, 2] + fontSize, col = text_bgcol, border = text_bgcol)<br />
text(pos[i, 1], pos[i, 2], molecule$atoms[i])<br />
}<br />
}<br />
<br />
if ( ! is.null(text) && text != "") {<br />
text_x <- x<br />
text_y <- min(pos[,2]) - 1<br />
text_pos <- 1<br />
if (w > 0) {<br />
text_x <- x + w / 2<br />
}<br />
if (h > 0) {<br />
text_y <- y - h<br />
text_pos <- 3<br />
}<br />
text(text_x, text_y, text, col = text_col, pos = text_pos)<br />
}<br />
}<br />
}<br />
<br />
init_screen(40, 40, bg = "cornsilk")<br />
<br />
oseltamivir <- list(<br />
atoms = c(rep('C', 6), 'N', 'C', 'C', 'O', 'O', rep('C', 5), 'C', 'O', 'O', 'C', 'C', 'N'),<br />
bonds = list(<br />
list(start = 1, end = 2, direction = 'downright', type = 'single'),<br />
list(start = 2, end = 3, direction = 'down', type = 'single'),<br />
list(start = 3, end = 4, direction = 'downleft', type = 'double'),<br />
list(start = 4, end = 5, direction = 'upleft', type = 'single'),<br />
list(start = 5, end = 6, direction = 'up', type = 'single'),<br />
list(start = 6, end = 1, direction = 'upright', type = 'single'),<br />
list(start = 1, end = 7, direction = 'up', type = 'single'),<br />
list(start = 7, end = 8, direction = 'upright', type = 'single'),<br />
list(start = 8, end = 9, direction = 'downright', type = 'single'),<br />
list(start = 8, end = 10, direction = 'up', type = 'double'),<br />
list(start = 2, end = 11, direction = 'upright', type = 'single'),<br />
list(start = 11, end = 12, direction = 'downright', type = 'single'),<br />
list(start = 12, end = 13, direction = 'upright', type = 'single'),<br />
list(start = 13, end = 14, direction = 'downright', type = 'single'),<br />
list(start = 12, end = 15, direction = 'down', type = 'single'),<br />
list(start = 15, end = 16, direction = 'downright', type = 'single'),<br />
list(start = 4, end = 17, direction = 'down', type = 'single'),<br />
list(start = 17, end = 18, direction = 'downright', type = 'double'),<br />
list(start = 17, end = 19, direction = 'downleft', type = 'single'),<br />
list(start = 19, end = 20, direction = 'down', type = 'single'),<br />
list(start = 20, end = 21, direction = 'downleft', type = 'single'),<br />
list(start = 6, end = 22, direction = 'upleft', type = 'single')<br />
)<br />
)<br />
draw_molecule(oseltamivir, 10, 30, 20, 20, text = "达菲")<br />
上面的代码方便大家拷贝粘贴运行。更多关于氨基酸的代码放附件中了,有兴趣的下载跑跑看吧(使用:source("DrawAminoAcids.R"))。