Author : tmlab / Date : 2016. 10. 17. 18:59 / Category : Text Mining/R
pos.dic <- scan("pos_dic.txt",what="character")
neg.dic <- scan("neg_dic.txt",what="character")
head(pos.dic)
library(readxl)
blog <- read_excel("after_POS.xlsx",1)
str(blog)
library(stringr)
pos=0
for(word in pos.dic){
pos_match = str_count(blog$article_POS,word)
pos = pos+pos_match
}
neg=0
for(word in neg.dic){
neg_match = str_count(blog$article_POS,word)
neg = neg+neg_match
}
score = pos-neg
blog$sent_score <- score
tot = pos+neg
blog$sent = score/tot
str(blog)
cat("PROS: ",sum(blog$sent>0,na.rm=T))
cat("CONS: ",sum(blog$sent<=0,na.rm=T))
cat("PROS: ",sum(blog$sent_score>0))
cat("PROS: ",sum(blog$sent_score<=0))
blog[is.na(blog$sent),]