AbdoIR
/

x-sentiment-analysis

Model card Files Files and versions

x-sentiment-analysis / api.py

AbdoIR's picture

Update api.py

b22f197 verified 7 months ago

history blame contribute delete

2.97 kB

	from flask import Flask, request, send_file, jsonify
	from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
	import pandas as pd
	import torch
	import tempfile
	import os
	import re
	from collections import Counter
	from flask_cors import CORS

	app = Flask(__name__)
	CORS(app)

	# Load model from Hugging Face Hub
	model_name = "AbdoIR/x-sentiment-analysis/fine_tuned_model"
	model = DistilBertForSequenceClassification.from_pretrained(model_name)
	tokenizer = DistilBertTokenizer.from_pretrained(model_name)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)
	model.eval()

	# Predict sentiment
	def predict_sentiment(texts):
	encodings = tokenizer(texts, truncation=True, padding=True, max_length=128, return_tensors="pt")
	encodings = {key: val.to(device) for key, val in encodings.items()}
	with torch.no_grad():
	outputs = model(**encodings)
	predictions = torch.argmax(outputs.logits, dim=1)
	sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
	return [sentiment_map[p.item()] for p in predictions]

	# Top frequent words
	def get_top_words(texts, n=30):
	all_words = []
	for text in texts:
	tokens = re.findall(r'\b\w{3,}\b', str(text).lower())
	all_words.extend(tokens)
	counter = Counter(all_words)
	most_common = counter.most_common(n)
	return pd.DataFrame(most_common, columns=['word', 'count'])

	# POST /predict
	@app.route('/predict', methods=['POST'])
	def predict():
	if 'file' not in request.files:
	return jsonify({'error': 'No file uploaded'}), 400

	file = request.files['file']
	try:
	df = pd.read_csv(file)
	except Exception:
	try:
	file.seek(0)
	df = pd.read_excel(file)
	except Exception:
	return jsonify({'error': 'Unable to read the file'}), 400

	if 'content' in df.columns:
	text_col = 'content'
	elif 'tweet' in df.columns:
	text_col = 'tweet'
	else:
	return jsonify({'error': 'No "content" or "tweet" column found'}), 400

	texts = df[text_col].astype(str).tolist()
	df['sentiment'] = predict_sentiment(texts)

	top_words_df = get_top_words(texts)

	temp_dir = tempfile.mkdtemp()

	sentiment_path = os.path.join(temp_dir, 'final_data.csv')
	df.to_csv(sentiment_path, index=False)

	words_path = os.path.join(temp_dir, 'word_frequent.csv')
	top_words_df.to_csv(words_path, index=False)

	return jsonify({
	'sentiment_file': f'/download?file={sentiment_path}',
	'top_words_file': f'/download?file={words_path}',
	'sentiment_data': df.to_dict(orient='records'),
	'top_words_data': top_words_df.to_dict(orient='records')
	})

	# GET /download
	@app.route('/download')
	def download():
	file_path = request.args.get('file')
	if not file_path or not os.path.exists(file_path):
	return jsonify({'error': 'File not found'}), 404
	return send_file(file_path, as_attachment=True)

	if __name__ == '__main__':
	app.run(host="0.0.0.0", port=5000, debug=True)