Twitter Sentimental Analysis using Python and NLTK

This python program will allow you to analyze tweets and comments from twitter and determine sentiment for some person or object on twitter :

click here to see video

What is sentiment analysis?

Sentiment Analysis is the process of ‘computationally’ determining whether a piece of writing is positive or negative. It’s also known as opinion mining, deriving the opinion or attitude of a speaker.

Why sentiment analysis?

Business: In marketing field companies use it to develop their strategies, to understand customers’ feelings towards products or brand, how people respond to their campaigns or product launches and why consumers don’t buy some
products.
Politics: In political field, it is used to keep track of political view, to detect consistency and inconsistency between statements and actions at the government level. It can be used to predict election results as well!
Public Actions: Sentiment analysis also is used to monitor and analyse social phenomena, for the spotting of potentially dangerous situations and determining the general mood of the blogosphere.

How can I fetch tweets from twitter?

In order to fetch tweets through Twitter API, one needs to register an App through their twitter account. Follow these steps for the same:

Open this link and click the button: ‘Create New App’

Fill the application details. You can leave the callback url field empty.

Once the app is created, you will be redirected to the app page.

Open the ‘Keys and Access Tokens’ tab.

Copy ‘Consumer Key’, ‘Consumer Secret’, ‘Access token’ and ‘Access Token Secret’.

Code:

*Train.py*- For Training Model

# import all needed packages

import nltk

import random

import pandas as pd

from nltk.tokenize import word_tokenize

from nltk.classify.scikitlearn import SklearnClassifier

import pickle

from sklearn.naive_bayes import MultinomialNB, BernoulliNB

from sklearn.linear_model import LogisticRegression, SGDClassifier

from sklearn.svm import LinearSVC, NuSVC

from nltk.classify import ClassifierI

from statistics import mode

# create a class which extends ClassifierI Class

# this class use all classifiers classification and average them

class VoteClassifier(ClassifierI):

def __init__(self, *classifiers):

self._classifiers = classifiers

# create classify method which return maximum occured vote

def classify(self, features):

votes = []

for c in self._classifiers:

v = c.classify(features)

votes.append(v)

return mode(votes)

# create confidence method which return confidence of that vote

def confidence(self, features):

votes = []

for c in self._classifiers:

v = c.classify(features)

votes.append(v)

choice_votes = votes.count(mode(votes))

conf = choice_votes / len(votes)

return conf

# read csv files for training

short_pos = pd.read_csv("processedPositive.csv")

short_neg = pd.read_csv("processedNegative.csv")

# read txt files for training

short_pos2 = open("positive_reviews.txt", "r").read()

short_neg2 = open("negative_reviews.txt", "r").read()

# create list for give label to training data

docs =[]

# create strings for tokenize positive and negative reviews

s_pos = " "

s_neg = " "

# append reviews with label in docs list

# append reviews in strings for tokenize

for r in short_pos:

docs.append( (r, "pos") )

s_pos = s_pos+" "+r

for r in short_neg:

docs.append( (r, "neg") )

s_neg = s_neg+" "+r

for r in short_pos2.split('\n'):

docs.append( (r, "pos") )

s_pos = s_pos+" "+r

for r in short_neg2.split('\n'):

docs.append( (r, "neg") )

s_neg = s_neg+" "+r

# pickle docs for reuse and save time

docs_f = open("documents.pickle", "wb")

pickle.dump(docs, docs_f)

docs_f.close()

# create empty list for store all words

all_words = []

# tokenize positive and negative reviews string and store in two different list

short_pos_words = word_tokenize(s_pos)

short_neg_words = word_tokenize(s_neg)

# all words from these lists append in all_words list

for w in short_pos_words:

all_words.append(w.lower())

for w in short_neg_words:

all_words.append(w.lower())

# A frequency distribution records the number of times each words

all_words = nltk.FreqDist(all_words)

# create a list of first 6000 keys of all_words

word_feature = list(all_words.keys())[:6000]

# pickle word_feature

word_feature_f = open("word_features.pickle", "wb")

pickle.dump(word_feature, word_feature_f)

word_feature_f.close()

# create function which return dictionary of features

def find_features(doc):

words = word_tokenize(doc)

features = {}

for w in word_feature:

features[w] = (w in words)

return features

# create featureset list in which we have tuple of reviews and category

featureset = [(find_features(rev), category) for (rev, category) in docs]

# pickle featureset

featureset_f = open("featureset.pickle", "wb")

pickle.dump(featureset, featureset_f)

featureset_f.close()

# random shuffle featureset

random.shuffle(featureset)

# create training and testing data

training_set = featureset[:12000]

testing_set = featureset[12000:]

# create naive bayes classifier and train using training set

classifier = nltk.NaiveBayesClassifier.train(training_set)

#pickle naive bayes classifier

classifier_f = open("naivebayes.pickle", "wb")

pickle.dump(classifier, classifier_f)

classifier_f.close()

#print accuracy of naive bayes classifier

print("Naive Bayes Accuracy :",(nltk.classify.accuracy(classifier, testing_set))*100)

# print most 15 informative features

classifier.show_most_informative_features(15)

# create Multinomial naive bayes classifier and train using training set

MNB_classifier = SklearnClassifier(MultinomialNB())

MNB_classifier.train(training_set)

#pickle Multinomial naive bayes classifier

saveMNB_classifier = open("MNBclassifier_features.pickle", "wb")

pickle.dump(MNB_classifier, saveMNB_classifier)

saveMNB_classifier.close()

#print accuracy of Multinomial naive bayes classifier

print("MNB_classifier Accuracy :",(nltk.classify.accuracy(MNB_classifier, testing_set))*100)

# create Bernoulli naive bayes classifier and train using training set

BNB_classifier = SklearnClassifier(BernoulliNB())

BNB_classifier.train(training_set)

#pickle Bernoulli naive bayes classifier

saveBNB_classifier = open("BNBclassifier_features.pickle", "wb")

pickle.dump(BNB_classifier, saveBNB_classifier)

saveBNB_classifier.close()

#print accuracy of Bernoulli naive bayes classifier

print("BernoulliNB_classifier Accuracy :",(nltk.classify.accuracy(BNB_classifier, testing_set))*100)

# create LogisticRegression classifier and train using training set

LogisticRegression_classifier = SklearnClassifier(LogisticRegression())

LogisticRegression_classifier.train(training_set)

#pickle LogisticRegression classifier

saveLOG_classifier = open("LogisticRegression_classifier_features.pickle", "wb")

pickle.dump(LogisticRegression_classifier, saveLOG_classifier)

saveLOG_classifier.close()

#print accuracy of LogisticRegression classifier

print("LogisticRegression_classifier Accuracy :",(nltk.classify.accuracy(LogisticRegression_classifier, testing_set))*100)

# create SGD classifier and train using training set

SGDClassifier_classifier = SklearnClassifier(SGDClassifier())

SGDClassifier_classifier.train(training_set)

#pickle SGD classifier

saveSGDC_classifier = open("SGDclassifier_features.pickle", "wb")

pickle.dump(SGDClassifier_classifier, saveSGDC_classifier)

saveSGDC_classifier.close()

#print accuracy of SGD classifier

print("SGDClassifier_classifier Accuracy :",(nltk.classify.accuracy(SGDClassifier_classifier, testing_set))*100)

# create LinearSVC classifier and train using training set

LinearSVC_classifier = SklearnClassifier(LinearSVC())

LinearSVC_classifier.train(training_set)

#pickle LinearSVC classifier

saveLSVC_classifier = open("LSVCclassifier_features.pickle", "wb")

pickle.dump(LinearSVC_classifier, saveLSVC_classifier)

saveLSVC_classifier.close()

#print accuracy of LinearSVC classifier

print("LinearSVC_classifier Accuracy :",(nltk.classify.accuracy(LinearSVC_classifier, testing_set))*100)

# create NuSVC classifier and train using training set

NuSVC_classifier = SklearnClassifier(NuSVC())

NuSVC_classifier.train(training_set)

#pickle NuSVC classifier

saveNuSVC_classifier = open("NuSVCclassifier_features.pickle", "wb")

pickle.dump(NuSVC_classifier, saveNuSVC_classifier)

saveNuSVC_classifier.close()

#print accuracy of NuSVC classifier

print("NuSVC_classifier Accuracy :",(nltk.classify.accuracy(NuSVC_classifier, testing_set))*100)

# create voted_classifier which is object of VoteClassifier Class

voted_classifier = VoteClassifier(MNB_classifier, BNB_classifier, LogisticRegression_classifier, LinearSVC_classifier, NuSVC_classifier)

# print accuracy of voted_classifier

print("Voted_classifier Accuracy :",(nltk.classify.accuracy(voted_classifier, testing_set))*100)

*Sentiment_mod.py* - Create Function for analyze Sentiment

# import all needed packages

import nltk

from nltk.tokenize import word_tokenize

import pickle

from nltk.classify import ClassifierI

from statistics import mode

# create a class which extends ClassifierI Class

# this class use all classifiers classification and average them

class VoteClassifier(ClassifierI):

def __init__(self, *classifiers):

self._classifiers = classifiers

# create classify method which return maximum occured vote

def classify(self, features):

votes = []

for c in self._classifiers:

v = c.classify(features)

votes.append(v)

return mode(votes)

# create confidence method which return confidence of that vote

def confidence(self, features):

votes = []

for c in self._classifiers:

v = c.classify(features)

votes.append(v)

choice_votes = votes.count(mode(votes))

conf = choice_votes / len(votes)

return conf

# load pickled docs

docs_f = open("documents.pickle", "rb")

docs = pickle.load(docs_f)

docs_f.close()

# load pickled word_feature

word_feature_f = open("word_features.pickle", "rb")

word_feature = pickle.load(word_feature_f)

word_feature_f.close()

# create function which return dictionary of features

def find_features(doc):

words = word_tokenize(doc)

features = {}

for w in word_feature:

features[w] = (w in words)

return features

# load pickled featureset

featureset_f = open("featureset.pickle", "rb")

featureset = pickle.load(featureset_f)

featureset_f.close()

# load pickled naive bayes classifier

classifier_f = open("naivebayes.pickle", "rb")

classifier = pickle.load(classifier_f)

classifier_f.close()

# load pickled Multinomial naive bayes classifier

saveMNB_classifier = open("MNBclassifier_features.pickle", "rb")

MNB_classifier = pickle.load(saveMNB_classifier)

saveMNB_classifier.close()

# load pickled Bernoulli naive bayes classifier

saveBNB_classifier = open("BNBclassifier_features.pickle", "rb")

BNB_classifier = pickle.load(saveBNB_classifier)

saveBNB_classifier.close()

# load pickled LogisticRegression classifier

saveLOG_classifier = open("LogisticRegression_classifier_features.pickle", "rb")

LogisticRegression_classifier = pickle.load(saveLOG_classifier)

saveLOG_classifier.close()

# load pickled SGD classifier

saveSGDC_classifier = open("SGDclassifier_features.pickle", "rb")

SGDClassifier_classifier = pickle.load(saveSGDC_classifier)

saveSGDC_classifier.close()

# load pickled LinearSVC classifier

saveLSVC_classifier = open("LSVCclassifier_features.pickle", "rb")

LinearSVC_classifier = pickle.load(saveLSVC_classifier)

saveLSVC_classifier.close()

# load pickled NuSVC classifier

saveNuSVC_classifier = open("NuSVCclassifier_features.pickle", "rb")

NuSVC_classifier = pickle.load(saveNuSVC_classifier)

saveNuSVC_classifier.close()

# create voted_classifier which is object of VoteClassifier Class

voted_classifier = VoteClassifier(MNB_classifier, LogisticRegression_classifier, SGDClassifier_classifier, LinearSVC_classifier, NuSVC_classifier)

# create function for find sentiment of some text

# function return maximum occured vote and confidence

def sentiment(text):

feats = find_features(text)

return voted_classifier.classify(feats), voted_classifier.confidence(feats)

*twitter_sentiment_analysis.py* - Fetch data from twitter and do sentimental analysis

# import needed packages

from tweepy import Stream

from tweepy import OAuthHandler

from tweepy.streaming import StreamListener

import time

import json

import sentiment_mod as s

# open twitter developer website and create new app

# after create new app go to detail of that app

# then go to Keys and Tokens and copy keys and tokens

# consumer key, consumer secret, access token, access secret.

ckey=" paste your api key "

csecret=" paste your api secret key "

atoken=" paste your access token "

asecret=" paste your access token secret "

# create listener class which extends StreamListener Class

class listener(StreamListener):

# create function for analyze Sentiment

def on_data(self, data):

try:

# load data from twitter usinf json

all_data = json.loads(data)

# store text from data in tweet

tweet = all_data["text"]

# find vote and confidence using sentiment function

sentiment_value, confidence = s.sentiment(tweet)

# print vote and confidence

print(tweet, sentiment_value, confidence)

# if confidenve greater than 0.8

if confidence*100 >= 80:

# open text file in append mode

output = open("narendra_modi.txt", "a")

# write sentiment_value(vote) in file

output.write(sentiment_value)

output.write("\n")

# close file

output.close()

return True

except:

return True

# create function for show any errors if occurs

def on_error(self, status):

print(status)

# authenticate app keys and tokens

auth = OAuthHandler(ckey, csecret)

auth.set_access_token(atoken, asecret)

# create twitterStream with app info and listener class

twitterStream = Stream(auth, listener())

# track any word using twitterStream

twitterStream.filter(track=["narendra modi"])

* live_graph_twitter.py * - Live Graph of analysis of sentiments for tweets

# import needed packages

import matplotlib.pyplot as plt

import matplotlib.animation as animation

from matplotlib import style

# set style of graph

style.use('fivethirtyeight')

# create fig with display name

fig = plt.figure("Twitter Sentiment for Narendra Modi")

# add subplot in fig

ax1 = fig.add_subplot(1,1,1)

# create function animate

def animate(i):

# read data for graph from txt file

graph_data = open('narendra_modi.txt','r').read()

# split data when new line occur

lines = graph_data.split("\n")

# create two empty lists

xar = []

yar = []

# create two variable

x = 0

y = 0

# for new line in lines

for l in lines:

# increase x by 1 for every line

x += 1

# if "pos" in line then increase y by 1

if "pos" in l:

y += 1

# if "neg" in line then decrease y by 1

elif "neg" in l:

y -= 1

# append x in xar and y in yar

xar.append(x)

yar.append(y)

# clear subplot

ax1.clear()

# plot xar and yar in subplot

ax1.plot(xar, yar)

# set title of plot

plt.title("Sentiment Analysis for Narendra Modi")

# give animation to graph

ani = animation.FuncAnimation(fig, animate, interval=1000)

# show graph

plt.show()

click here to see video

Comments

VenkyOctober 13, 2021 at 12:51 PM
AI & ML in Dubai
https://www.nsreem.com/ourservices/ai-ml/
Artificial intelligence is very widespread today. In at least certainly considered one among its various forms has had an impact on all major industries in the world today, NSREEM is #1 AI & ML Service Provider in Dubai
1634109696860-11

Parth Sojitra

Search This Blog

Twitter Sentimental Analysis using Python and NLTK

Comments

Post a Comment