Spaces:

jovincia
/

comment-classification-skill

Sleeping

App Files Files Community

comment-classification-skill / app.py

jovincia

Upload app.py with huggingface_hub

f9cb218 verified 8 days ago

raw

history blame contribute delete

5.25 kB

	"""
	Gradio App for HF Space Deployment
	Comment Classification Skill powered by Qwen2.5-1.5B fine-tuned model.
	"""

	import os
	import json
	import time
	import torch
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	# ============================================================
	# Load Model
	# ============================================================
	MODEL_ID = os.environ.get("MODEL_ID", "jovincia/qwen25-comment-classifier")

	print(f"Loading model from {MODEL_ID}...")
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
	model = AutoModelForSequenceClassification.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float32, # CPU-safe for HF Spaces free tier
	trust_remote_code=True,
	)
	model.eval()
	id2label = model.config.id2label
	print(f"Model loaded. Labels: {id2label}")
	except Exception as e:
	print(f"ERROR: Failed to load model from '{MODEL_ID}': {e}")
	print("Make sure the model has been trained (02_finetune.py) or the HF repo exists.")
	raise SystemExit(1)

	# Color mapping for labels
	LABEL_COLORS = {
	"positive": "#4CAF50",
	"negative": "#F44336",
	"neutral": "#9E9E9E",
	"ambiguous": "#FF9800",
	}

	def classify_comment(text: str) -> dict:
	"""Classify a comment and return label probabilities."""
	if not text or not text.strip():
	return {label: 0.0 for label in id2label.values()}

	start_time = time.perf_counter()

	inputs = tokenizer(
	text,
	padding=True,
	truncation=True,
	max_length=256,
	return_tensors="pt",
	)

	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.softmax(outputs.logits, dim=-1)[0]

	latency_ms = (time.perf_counter() - start_time) * 1000

	# Build result dict
	result = {}
	for idx, prob in enumerate(probs.tolist()):
	label = id2label[str(idx)] if str(idx) in id2label else id2label.get(idx, f"class_{idx}")
	result[label] = round(prob, 4)

	# Print latency for monitoring
	predicted = max(result, key=result.get)
	print(f"[{latency_ms:.1f}ms] '{text[:50]}...' -> {predicted} ({result[predicted]:.3f})")

	return result


	def batch_classify(texts: str) -> str:
	"""Classify multiple comments (one per line)."""
	if not texts or not texts.strip():
	return "Please enter at least one comment."

	lines = [line.strip() for line in texts.strip().split("\n") if line.strip()]
	results = []

	for line in lines:
	probs = classify_comment(line)
	predicted = max(probs, key=probs.get)
	confidence = probs[predicted]
	results.append(f"[{predicted.upper()}] ({confidence:.1%}) {line}")

	return "\n".join(results)


	# ============================================================
	# Gradio Interface
	# ============================================================
	with gr.Blocks(
	title="Comment Classification Skill",
	theme=gr.themes.Soft(),
	) as demo:
	gr.Markdown(
	"""
	# Comment Classification Skill
	Fine-tuned Qwen2.5-1.5B for 4-class comment sentiment classification.

	Classes: positive \| negative \| neutral \| ambiguous
	"""
	)

	with gr.Tab("Single Comment"):
	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(
	label="Enter a comment",
	placeholder="Type your comment here...",
	lines=3,
	)
	classify_btn = gr.Button("Classify", variant="primary")
	with gr.Column():
	output_label = gr.Label(label="Classification Result", num_top_classes=4)

	classify_btn.click(
	fn=classify_comment,
	inputs=input_text,
	outputs=output_label,
	)

	gr.Examples(
	examples=[
	["This product is amazing! Best purchase I've ever made."],
	["Terrible quality. Broke after one day of use."],
	["It arrived on time. Standard packaging."],
	["I'm not sure if this is working correctly or not..."],
	["The customer service was incredibly helpful and kind!"],
	["What a waste of money. Never buying from here again."],
	],
	inputs=input_text,
	)

	with gr.Tab("Batch Classification"):
	gr.Markdown("Enter one comment per line for batch processing.")
	batch_input = gr.Textbox(
	label="Comments (one per line)",
	placeholder="Comment 1\nComment 2\nComment 3",
	lines=8,
	)
	batch_btn = gr.Button("Classify All", variant="primary")
	batch_output = gr.Textbox(label="Results", lines=10, interactive=False)

	batch_btn.click(
	fn=batch_classify,
	inputs=batch_input,
	outputs=batch_output,
	)

	gr.Markdown(
	"""
	---
	Model: Qwen2.5-1.5B fine-tuned with LoRA on GoEmotions dataset (58k+ comments)
	Task: 4-class comment sentiment classification
	"""
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)