import time
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
# Sample data (for demonstration)
reviews = ["I love this product", "This is bad", "Excellent quality", "Not good", "Very happy", "Terrible experience"] * 200
labels = [1, 0, 1, 0, 1, 0] * 200 # 1=positive, 0=negative
# Train a simple model
model = make_pipeline(TfidfVectorizer(), LogisticRegression(max_iter=1000))
model.fit(reviews, labels)
# Prepare test data
test_reviews = ["I love it", "Worst ever", "Pretty good", "Not what I expected", "Fantastic", "Awful"] * 167
true_labels = [1, 0, 1, 0, 1, 0] * 167
# Batch inference
start_batch = time.time()
pred_batch = model.predict(test_reviews)
end_batch = time.time()
batch_time = end_batch - start_batch
batch_accuracy = accuracy_score(true_labels, pred_batch)
batch_avg_time = batch_time / len(test_reviews)
# Real-time inference
start_real = time.time()
pred_real = []
for review in test_reviews:
pred = model.predict([review])[0]
pred_real.append(pred)
end_real = time.time()
real_time = end_real - start_real
real_accuracy = accuracy_score(true_labels, pred_real)
real_avg_time = real_time / len(test_reviews)
print(f"Batch inference accuracy: {batch_accuracy*100:.2f}%, average time per review: {batch_avg_time:.4f} seconds")
print(f"Real-time inference accuracy: {real_accuracy*100:.2f}%, average time per review: {real_avg_time:.4f} seconds")