
Before starting the program let me put something forward:
1. The idea is copied from Google and code is written from the help of various sources
2. You must have the below mentioned libraries
3. You need to have at least R 2.15.1 or above to run the following code
4. You need to have own Twitter account
5. You can’t run the entire code in single shot, have to run line by line
You can download the list of positive and negative words from below links:
Now you need to create a Twitter application:
1. Go to https://apps.twitter.com/
2. Create a new Twitter app
3. Fill the boxes accordingly and make it private
4. After doing the formality you will this kind of page
5. If you click on “Manage keys and access token”, you will get a ‘Customer Secret’
6. Save this information. We will use it later.
####################### TWITTER SENTIMENT ANALYSIS ################### ############# AUTHOR: DEEPESH SINGH ############# PURPOSE: TWITTER SENTIMENT ANALYSIS WITH KEYWORD #MODI ############# DATE: 12/20/2015 ########################### MODULE 1 ################################ ############# HANDSHAKING TWITTER AND GETTING THE DATA ############## ## SETTING WORKING DIRECTORY path <- "E:/R Programs/Twitter Sentiment Analysis" setwd(path) ## CALLING ALL REQUIRED LIBRARY library(twitteR) #twitter interface API library(ROAuth) #for authentication library(plyr) #to break big problem into smaller library(stringr) #make easier to work with strings library(ggplot2) #implementation of the grammar of graphics library(RColorBrewer)#provide palettes for drawing library(tm) #tools for text mining library(wordcloud) #used to create word cloud library(RCpp) #R and C++ integration library(RCurl) #HTTP/FTP client interface for R ## ## Bundle of CA Root Certificates ## ## Certificate data from Mozilla as of: Wed Oct 28 04:12:04 2015 ## ## This is a bundle of X.509 certificates of public Certificate Authorities ## (CA). These were automatically extracted from Mozilla's root certificates ## file (certdata.txt). This file can be found in the mozilla source tree: ## http://hg.mozilla.org/releases/mozilla-release/raw-file/default/security/nss/lib/ckfw/builtins/certdata.txt ## ## It contains the certificates in PEM format and therefore ## can be directly used with curl / libcurl / php_curl, or with ## an Apache+mod_ssl webserver for SSL client authentication. ## Just configure this file as the SSLCACertificateFile. ## ## Conversion done with mk-ca-bundle.pl version 1.25. ## SHA1: 6d7d2f0a4fae587e7431be191a081ac1257d300a ## ## DOWNLOAD FILE download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile = "cacert.pm") ## NOW WE WILL ACCESS THE TWITTER API WHICH WE HAVE JUST CREATED ## ALL DETAILS CAN BE FOUND FROM THE TWITTER APPLICATION PAGE requestURL <- "https://api.twitter.com/oauth/request_token" accessURL <- "https://api.twitter.com/oauth/access_token" authURL <- "https://api.twitter.com/oauth/authorize" consumerKey <- "LMfRAhWF##############2a" consumerSecret <- "06rghyRRxwLE4################HW2h63iPISeQkxCB4rf" Cred <- OAuthFactory$new (consumerKey = consumerKey, consumerSecret = consumerSecret, requestURL = requestURL, accessURL = accessURL, authURL = authURL) Cred$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = ('RCurl'))) ## AFTER RUNNING THIS LINE YOU WILL GET A URL WHICH YOU NEED TO PASTE TO YOUR WEB BROWSER TO GET A KEY ## IT WILL LOOK LIKE THIS MESSAGE ## To enable the connection, please direct your web browser to: ## https://api.twitter.com/oauth/authorize?oauth_token=iTftDgAAAAAAjWK9AAABUcCT6T8 ## When complete, record the PIN given to you and provide it here: 7047697 ## NOW WE WILL SAVE THIS ENVIRONMENT save(Cred, file = "Twitter_Auth.RData") load("Twitter_Auth.RData") registerTwitterOAuth(Cred) ## THE CONSOLE OUTPUT WILL BE TRUE THAT MEANS WE HAVE GOT SUCCESSFUL HANDSHAKE ## NOW WE WILL TRY TO FETCH TWITS FROM TWITTER USING THE HASH TAG KEYWORD download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem") ## HERE I AM EXTRACTING TOP 500 TWITS ON THIS TOPIC/KEYWORD modi.list <- searchTwitter('#Modi', n = 500, cainfo = "cacert.pem") modi.df <- twListToDF(modi.list) write.csv(modi.df, "ModiTwits.csv",row.names = F) ########################### MODULE 2 ################################ ############# SENTIMENT ANALYSIS WITH THE ABOVE DATA ############## ## SENTIMENT SCORE = POSITIVE SCORE - NEGATIVE SCORE # function score.sentiment score.sentiment = function(sentences, pos.words, neg.words, .progress='none') { # Parameters # sentences: vector of text to score # pos.words: vector of words of postive sentiment # neg.words: vector of words of negative sentiment # .progress: passed to laply() to control of progress bar # create simple array of scores with laply scores = laply(sentences, function(sentence, pos.words, neg.words) { # remove punctuation sentence = gsub("[[:punct:]]", "", sentence) # remove control characters sentence = gsub("[[:cntrl:]]", "", sentence) # remove digits? sentence = gsub('\\d+', '', sentence) # define error handling function when trying tolower tryTolower = function(x) { # create missing value y = NA # tryCatch error try_error = tryCatch(tolower(x), error=function(e) e) # if not an error if (!inherits(try_error, "error")) y = tolower(x) # result return(y) } # use tryTolower with sapply sentence = sapply(sentence, tryTolower) # split sentence into words with str_split (stringr package) word.list = str_split(sentence, "\\s+") words = unlist(word.list) # compare words to the dictionaries of positive & negative terms pos.matches = match(words, pos.words) neg.matches = match(words, neg.words) # get the position of the matched term or NA # we just want a TRUE/FALSE pos.matches = !is.na(pos.matches) neg.matches = !is.na(neg.matches) # final score score = sum(pos.matches) - sum(neg.matches) return(score) }, pos.words, neg.words, .progress=.progress ) # data frame with scores for each sentence scores.df = data.frame(text=sentences, score=scores) return(scores.df) } # IMPORT POSITIVE AND NEGATIVE WORDS pos.words = scan("positive_words.txt", what = 'character', comment.char = ";") neg.words = scan("negative_words.txt", what = 'character', comment.char = ";") moditwit <- read.csv("ModiTwits.csv",sep=",") moditwit$text <- as.factor(moditwit$text) modi.score <- score.sentiment(moditwit$text, pos.words, neg.words, .progress ='text') write.csv(modi.score, "ModiScore.csv", row.names = F) ## PLOTTING THE OUTCOME hist(modi.score$score, xlab = "Score of Tweets", main = "Frequency of Twits sentiment") ########################### MODULE 3 ################################ ############### TEXT MINING AND WORDCLOUND DRAWING IN R ############# library(tm) library(wordcloud) #moditwit <- path #assigning the directory where only moditwit file is saved modiTwitPath <- "E:/R Programs/Twitter Sentiment Analysis/TwitData" moditwit = Corpus(DirSource(modiTwitPath), readerControl = list(Language = "eng")) ## DOING NORMAL DATA FORMATTING moditwit <- tm_map(moditwit, tolower) moditwit <- tm_map(moditwit, stripWhitespace) moditwit <- tm_map(moditwit, removePunctuation) moditwit <- tm_map(moditwit, removeWords, c ("Modi")) tdm <- TermDocumentMatrix(moditwit) m1 <- as.matrix (tdm) v1 <- sort(rowSums(m1), decreasing = TRUE) d1 <- data.frame(word = names(v1), freq = v1) wordcloud(d1$word, d1$freq, col=brewer.pal(8, "Set2"), min.freq = 1) ########################### THE END ################################ ############ TWITTER SENTIMENT ANALYSIS CODE BY DEEPESH #############
References:
1. List of positive keywords – GitHub
2. List of negative keywords – GitHub
3. Dirk Eddelbuettel RCpp library
4. Sentiment Score Analysis – Breen’s approach
5. Twitter Sentiment Analysis
6. Twitter Sentiment Analysis by Kaify