Spaces:

AbdoIR
/

x-sentiment-analysis

Running

App Files Files Community

x-sentiment-analysis / api.py

AbdoIR

Update api.py

fe5868d verified about 2 months ago

raw

history blame contribute delete

4.19 kB

	from fastapi import FastAPI, UploadFile, File, HTTPException, Request
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import FileResponse, JSONResponse
	from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
	import pandas as pd
	import torch
	import tempfile
	import os
	import re
	from collections import Counter
	import datetime

	app = FastAPI()

	# Enable CORS
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Load model
	model = DistilBertForSequenceClassification.from_pretrained("./fine_tuned_model")
	tokenizer = DistilBertTokenizer.from_pretrained("./fine_tuned_model")

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)
	model.eval()


	# Sentiment prediction
	def predict_sentiment(texts):
	encodings = tokenizer(texts, truncation=True, padding=True, max_length=128, return_tensors="pt")
	encodings = {key: val.to(device) for key, val in encodings.items()}
	with torch.no_grad():
	outputs = model(**encodings)
	predictions = torch.argmax(outputs.logits, dim=1)
	sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
	return [sentiment_map[p.item()] for p in predictions]


	# Frequent words
	def get_top_words(texts, n=30):
	all_words = []
	for text in texts:
	tokens = re.findall(r'\b\w{3,}\b', str(text).lower())
	all_words.extend(tokens)
	counter = Counter(all_words)
	most_common = counter.most_common(n)
	return pd.DataFrame(most_common, columns=['word', 'count'])


	# Identify column
	def get_text_column(df):
	for col in ['content', 'tweet', 'text']:
	if col in df.columns:
	return col
	return None

	@app.api_route("/", methods=["GET", "HEAD"])
	async def index(request: Request):
	return JSONResponse({
	"status": "ok",
	"message": "Server is alive",
	"timestamp": datetime.datetime.utcnow().isoformat() + "Z"
	})

	# POST /predict
	@app.post("/predict")
	async def predict(file: UploadFile = File(...)):
	try:
	df = pd.read_csv(file.file)
	except Exception:
	try:
	file.file.seek(0)
	df = pd.read_excel(file.file)
	except Exception:
	raise HTTPException(status_code=400, detail="Unable to read the file")

	text_col = get_text_column(df)
	if not text_col:
	raise HTTPException(status_code=400, detail='No "content", "tweet", or "text" column found')

	texts = df[text_col].astype(str).tolist()
	df['sentiment'] = predict_sentiment(texts)
	df['content_length'] = df[text_col].astype(str).apply(len)

	top_words_df = get_top_words(texts)

	temp_dir = tempfile.mkdtemp()
	sentiment_path = os.path.join(temp_dir, 'final_data.csv')
	words_path = os.path.join(temp_dir, 'word_frequent.csv')

	df.to_csv(sentiment_path, index=False)
	top_words_df.to_csv(words_path, index=False)

	return JSONResponse({
	'sentiment_file': f'/download?file={sentiment_path}',
	'top_words_file': f'/download?file={words_path}',
	'sentiment_data': df.to_dict(orient='records'),
	'top_words_data': top_words_df.to_dict(orient='records')
	})


	# POST /wordcloud
	@app.post("/wordcloud")
	async def wordcloud(file: UploadFile = File(...)):
	try:
	df = pd.read_csv(file.file)
	except Exception:
	try:
	file.file.seek(0)
	df = pd.read_excel(file.file)
	except Exception:
	raise HTTPException(status_code=400, detail="Unable to read the file")

	text_col = get_text_column(df)
	if not text_col:
	raise HTTPException(status_code=400, detail='No "content", "tweet", or "text" column found')

	texts = df[text_col].astype(str).tolist()
	top_words_df = get_top_words(texts)

	return JSONResponse({'top_words_data': top_words_df.to_dict(orient='records')})


	# GET /download
	@app.get("/download")
	async def download(file: str):
	if not file or not os.path.exists(file):
	raise HTTPException(status_code=404, detail="File not found")
	return FileResponse(file, filename=os.path.basename(file))