itellin
京东商城手机的价格好像在街上看见美女,只能看,不能摸。
在这个网址中http://list.jd.com/list.html?cat=9987,653,655
各个品牌是手机价格放在节点<div class="p-price">
<span id="p1057746"></span></div>中,因此趴取出来也是价格编号,看不见价格,怎么找都找不到价格藏在哪里,是京东压根就不给看,还是没有找到出处。
> url = "http://list.jd.com/list.html?cat=9987,653,655"
> price = url %>% html_session() %>% html_nodes(".p-price span") %>% html_attrs() %>% unlist %>% as.vector
> price
[1] "p1057746" "p1023433" "p1057741" "p1217524" "p1286133" "p1160548" "p981821" "p1165978"
[9] "p1023437" "p1199748" "p1124332" "p1217526" "p1033196" "p1086759" "p975628" "p1185017"
[17] "p1057740" "p1300779" "p1139519" "p1099153" "p1300419" "p1264715" "p1182292" "p1217525"
[25] "p975641" "p1260571" "p1160245" "p981822" "p1041685" "p1163613" "p1220064" "p855739"
[33] "p1301899" "p1185016" "p1411708283" "p1084276" "p1175898" "p1101144" "p1124365" "p1192180"
[41] "p1178704" "p917461" "p1220040" "p1023438" "p1241591" "p1256854" "p1299917" "p1058924"
[49] "p1124369" "p1101135" "p1058157" "p973861" "p1079888" "p919669" "p1106478" "p1169454"
[57] "p1178707" "p1138529" "p1220054" "p1157957"
zggjtsgzczh
http://p.3.cn/prices/mgets?skuIds=J_1057746,J_1023433,J_1057741,J_1217524,J_1286133,J_1160548,J_981821,J_1165978,J_1023437,J_1199748,J_1124332,J_1217526,J_1033196,J_1086759,J_975628,J_1185017,J_1057740,J_1300779,J_1139519,J_1099153,J_1300419,J_1264715,J_1182292,J_1217525,J_975641,J_1260571,J_1160245,J_981822,J_1041685,J_1163613,J_1220064,J_855739,J_1301899,J_1185016,J_1411708283,J_1084276,J_1175898,J_1101144,J_1124365,J_1192180,J_1178704,J_917461,J_1220040,J_1023438,J_1241591,J_1256854,J_1299917,J_1058924,J_1124369,J_1101135,J_1058157,J_973861,J_1079888,J_919669,J_1106478,J_1169454,J_1178707,J_1138529,J_1220054,J_1157957&type=1&area=1_72_4137&callback=jsonp1421128559216&_=1421128559650
itellin
经过楼上的高手指点,京东的手机价格总算趴下来了。
require(stringr)
require(rvest)
require(rjson)
myfun = function(x){
url = str_c("http://list.jd.com/list.html?cat=9987%2C653%2C655&page=",x,"&JL=6_0_0")
brand = url %>% html_session() %>% html_nodes(".right-extra .p-name a") %>% html_text()
b = brand[-c(1:(length(brand) - 60))] %>% strsplit(" ")
b1 = sapply(1:60,function(i) paste(b[[i]][1],collapse = ''))
b2 = sapply(1:60,function(i) paste(b[[i]][2],collapse = ''))
b3 = sapply(1:60,function(i) paste(b[[i]][-c(1:2)],collapse = ''))
evalue = url %>% html_session() %>% html_nodes(".evaluate a") %>% html_text() %>% str_extract("\\d+")
price = url %>% html_session() %>% html_nodes(".p-price span") %>% html_attrs() %>% unlist %>% as.vector %>% str_replace('p','J_')
price1 = apply(as.matrix(price),2,paste,collapse = ',')
u = str_c("http://p.3.cn/prices/mgets?skuIds=",price1)
dat = u %>% html() %>% html_nodes("p") %>% html_text()
p = fromJSON(dat)
price =sapply(1:60,function(i) p[[i]]$p)
data.frame(品牌 = b1,型号 = b2,性能描述 = b3,评价人数 = evalue,价格 = price)
}
dat = lapply(1:4,myfun) #总计有44页
total = do.call(rbind,dat)
> tail(total)
品牌 型号 性能描述 评价人数 价格
235 IUNI U2 32GB冰峰银联通3G手机 1723 1199.00
236 诺基亚(NOKIA) Lumia 830(RM-984)黑色联通3G手机 734 1699.00
237 华为 C8817E 黑电信4G手机 467 799.00
238 TCL 老人手机 (i310)纯净白移动联通2G手机 10970 199.00
239 联想 黄金斗士 A8(A808t-i)8G阿尔卑斯白移动4G手机 743 698.00
240 IUNI U3 32GB墨池黑移动联通4G手机双卡双待 1056 2000.00