from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.metrics import accuracy_score, classification_report
# Sample dataset: list of (text, true_label) where label in ['pos', 'neu', 'neg']
reviews = [
("I loved the movie, it was fantastic!", 'pos'),
("The movie was okay, not great but not bad.", 'neu'),
("I hated the movie, it was terrible.", 'neg'),
("An excellent film with a great story.", 'pos'),
("It was a dull movie, I almost fell asleep.", 'neg'),
("Nothing special, just average.", 'neu')
]
analyzer = SentimentIntensityAnalyzer()
# Adjusted thresholds for classification
# Original VADER uses compound >= 0.05 pos, <= -0.05 neg, else neutral
# We try stricter thresholds to reduce false positives
POS_THRESHOLD = 0.2
NEG_THRESHOLD = -0.2
predictions = []
true_labels = []
for text, label in reviews:
scores = analyzer.polarity_scores(text)
compound = scores['compound']
if compound >= POS_THRESHOLD:
pred = 'pos'
elif compound <= NEG_THRESHOLD:
pred = 'neg'
else:
pred = 'neu'
predictions.append(pred)
true_labels.append(label)
# Calculate accuracy per class
report = classification_report(true_labels, predictions, labels=['pos', 'neu', 'neg'], zero_division=0)
print(report)