我目前在写爬虫从S.txt 里面的网址爬出相关的资料,但我现在出现如图上的error,有什么方式可以修正呢! !感谢大大们! !希望各位大佬有空帮忙解决,跪谢!
圖片-

S.TXT
https://s8.aconvert.com/convert/p3r68-cdx67/cbtqo-95cs8.html
R Code
https://drive.google.com/file/d/1jUBlVOXClqUul3rx8j7dcYgz7mW8yE23/view?usp=sharing
https://drive.google.com/file/d/16PW6auvohoOHOOt4DqyhQ0CyEyev3S-M/view?usp=sharing
library(rvest)
library(RCurl)
library(xml2)
library(xlsx)
result <- list()
surl <- read.table("C:\\Users\\user\\Desktop\\PeiHua\\SPI三大類\\SPI專利網址\\S.txt", header=F, sep="",stringsAsFactors=FALSE)
names(surl) <- c("S類網址")
for(i in 1:length(surl[,])){
shtml <- read_html(surl[i,])
sdate <- shtml %>% html_nodes("tr:nth-child(4) .single-patent-bibdata") %>% html_text()
if(identical(patent.site,character(0))==FALSE){
for(p in 1:length(patent.site)){
if(nchar(patent.site[p])>14){
full.site[p] <- paste0("https://www.google.com.tw",patent.site[p])
}
}
}
stitle <- shtml %>% html_nodes("invention-title") %>% html_text()
sclaims <- shtml %>% html_nodes("div.claims") %>% html_text()
sclaims <- gsub("^\\s+|\\s+$", "", sclaims)
sclaims <- gsub("\\n","",sclaims)
sabstract <- shtml %>% html_nodes("div#p-0001.abstract") %>% html_text()
if(identical(sabstract,character(0))==TRUE){
sabstract <- shtml %>% html_nodes(".abstract") %>% html_text()
}
result[i] <- list(c(sdate,stitle,sclaims,sabstract,surl[i,]))
}
result_T <- matrix(ncol=5,nrow=length(result),byrow=TRUE)
colnames(result_T) <- c("發佈日期","標題","聲明所有權","摘要","網址")
for(n in 1:length(result)){
tmp <- result[[n]]
result_T[n,] <- rbind(tmp)
}
write.csv(result_T,"S類.csv")