from flask import Flask, request, send_file, jsonify from transformers import DistilBertForSequenceClassification, DistilBertTokenizer import pandas as pd import torch import tempfile import os import re from collections import Counter from flask_cors import CORS app = Flask(__name__) CORS(app) # Load model from Hugging Face Hub model_name = "AbdoIR/x-sentiment-analysis/fine_tuned_model" model = DistilBertForSequenceClassification.from_pretrained(model_name) tokenizer = DistilBertTokenizer.from_pretrained(model_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() # Predict sentiment def predict_sentiment(texts): encodings = tokenizer(texts, truncation=True, padding=True, max_length=128, return_tensors="pt") encodings = {key: val.to(device) for key, val in encodings.items()} with torch.no_grad(): outputs = model(**encodings) predictions = torch.argmax(outputs.logits, dim=1) sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"} return [sentiment_map[p.item()] for p in predictions] # Top frequent words def get_top_words(texts, n=30): all_words = [] for text in texts: tokens = re.findall(r'\b\w{3,}\b', str(text).lower()) all_words.extend(tokens) counter = Counter(all_words) most_common = counter.most_common(n) return pd.DataFrame(most_common, columns=['word', 'count']) # POST /predict @app.route('/predict', methods=['POST']) def predict(): if 'file' not in request.files: return jsonify({'error': 'No file uploaded'}), 400 file = request.files['file'] try: df = pd.read_csv(file) except Exception: try: file.seek(0) df = pd.read_excel(file) except Exception: return jsonify({'error': 'Unable to read the file'}), 400 if 'content' in df.columns: text_col = 'content' elif 'tweet' in df.columns: text_col = 'tweet' else: return jsonify({'error': 'No "content" or "tweet" column found'}), 400 texts = df[text_col].astype(str).tolist() df['sentiment'] = predict_sentiment(texts) top_words_df = get_top_words(texts) temp_dir = tempfile.mkdtemp() sentiment_path = os.path.join(temp_dir, 'final_data.csv') df.to_csv(sentiment_path, index=False) words_path = os.path.join(temp_dir, 'word_frequent.csv') top_words_df.to_csv(words_path, index=False) return jsonify({ 'sentiment_file': f'/download?file={sentiment_path}', 'top_words_file': f'/download?file={words_path}', 'sentiment_data': df.to_dict(orient='records'), 'top_words_data': top_words_df.to_dict(orient='records') }) # GET /download @app.route('/download') def download(): file_path = request.args.get('file') if not file_path or not os.path.exists(file_path): return jsonify({'error': 'File not found'}), 404 return send_file(file_path, as_attachment=True) if __name__ == '__main__': app.run(host="0.0.0.0", port=5000, debug=True)