在X乎上看到了一串爬虫代码,希望重现作者的运行结果,但出现错误无法获取数据
library(rvest)
library(stringr)
url1='http://sou.zhaopin.com/jobs/searchresult.ashx?jl=%e4%b8%8a%e6%b5%b7&kw=%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90&isadv=0&sg=d916c075b66b48249d6dba31c11b39e0&p='
zhiwei=company=salary=adress=time=zwhref=NULL
for( i in 1:10){
url=paste(url1,i,sep = '')
web = read_html(url)
zhiwei1=web %>% html_nodes('td.zwmc')%>% html_nodes('a')%>%html_text()
zhiwei = c(zhiwei,zhiwei1[str_length(zhiwei1) !=0])
company1= web %>% html_nodes('td.gsmc')%>% html_nodes('a')%>%html_text()
company = c(company,company1[str_length(company1) !=0])
salary=c(salary,web %>% html_nodes('td.zwyx')%>% html_text())
adress=c(adress,web %>% html_nodes('td.gzdd')%>% html_text())
time=c(time,web %>% html_nodes('td.gxsj')%>% html_nodes('span')%>%html_text())
zhiwei_href=web %>% html_nodes('td.zwmc')%>%
html_nodes('a')%>%html_attr('href')
zwhref=c(zwhref,zhiwei_href[str_length(zhiwei_href) > 40])
}
# error in open.connection(x, "rb") :
# Failed writing received data to disk/application
zhiliang=data.frame(职位=zhiwei,公司=company,月薪=salary,
地址=adress,日期=time,链接=zwhref)
DT::datatable(zhiliang)