Spaces:

Supitn
/

try_topic

No application file

App Files Files Community

try_topic / sentiment_analyzer.py

Supitn

Create sentiment_analyzer.py

abaee64 verified about 2 months ago

raw

history blame contribute delete

6.63 kB

	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	import torch
	import re

	class SentimentAnalyzer:
	def __init__(self, model_name="google/gemma-2-2b-it"):
	"""
	Initialize sentiment analyzer with Gemma model

	Args:
	model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี)
	"""
	print(f"Loading model: {model_name}")

	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {self.device}")

	try:
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
	device_map="auto" if self.device == "cuda" else None,
	low_cpu_mem_usage=True
	)

	if self.device == "cpu":
	self.model = self.model.to(self.device)

	print("Model loaded successfully!")

	except Exception as e:
	print(f"Error loading model: {e}")
	# Fallback to sentiment pipeline
	self.model = None
	self.sentiment_pipeline = pipeline(
	"sentiment-analysis",
	model="distilbert-base-uncased-finetuned-sst-2-english"
	)

	def analyze_sentiment(self, text):
	"""
	วิเคราะห์ sentiment ของข้อความ

	Args:
	text: ข้อความที่ต้องการวิเคราะห์

	Returns:
	dict: {sentiment, score, explanation}
	"""
	if not text or len(text.strip()) == 0:
	return {
	"sentiment": "Neutral",
	"score": 0.5,
	"explanation": "No text to analyze"
	}

	# ถ้า model โหลดไม่สำเร็จ ใช้ fallback pipeline
	if self.model is None:
	return self._fallback_sentiment(text)

	try:
	# สร้าง prompt สำหรับ Gemma
	prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1).
	News: {text[:500]}
	Provide your analysis in this exact format:
	Sentiment: [Positive/Negative/Neutral]
	Score: [0.0-1.0]
	Reason: [Brief explanation]"""

	# Tokenize และ generate
	inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
	inputs = inputs.to(self.device)

	with torch.no_grad():
	outputs = self.model.generate(
	**inputs,
	max_new_tokens=150,
	temperature=0.3,
	do_sample=True,
	pad_token_id=self.tokenizer.eos_token_id
	)

	response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Parse response
	return self._parse_llm_response(response)

	except Exception as e:
	print(f"Error in analysis: {e}")
	return self._fallback_sentiment(text)

	def _parse_llm_response(self, response):
	"""แยก sentiment, score และ explanation จาก LLM response"""
	sentiment = "Neutral"
	score = 0.5
	explanation = "Unable to analyze"

	try:
	# Extract sentiment
	if "Sentiment:" in response:
	sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
	if sentiment_line:
	sentiment = sentiment_line.group(1).capitalize()

	# Extract score
	if "Score:" in response:
	score_line = re.search(r'Score:\s*([\d.]+)', response)
	if score_line:
	score = float(score_line.group(1))
	score = max(0.0, min(1.0, score)) # Clamp between 0-1

	# Extract reason/explanation
	if "Reason:" in response:
	reason_match = re.search(r'Reason:\s*(.+?)(?:\n\|$)', response, re.IGNORECASE)
	if reason_match:
	explanation = reason_match.group(1).strip()

	# Validate sentiment
	if sentiment not in ["Positive", "Negative", "Neutral"]:
	if "positive" in response.lower():
	sentiment = "Positive"
	elif "negative" in response.lower():
	sentiment = "Negative"
	else:
	sentiment = "Neutral"

	except Exception as e:
	print(f"Parse error: {e}")

	return {
	"sentiment": sentiment,
	"score": score,
	"explanation": explanation
	}

	def _fallback_sentiment(self, text):
	"""Fallback method ใช้ DistilBERT"""
	try:
	result = self.sentiment_pipeline(text[:512])[0]

	# Convert to our format
	sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
	score = result['score']

	return {
	"sentiment": sentiment,
	"score": score,
	"explanation": f"Analyzed using fallback model with {score:.2%} confidence"
	}
	except:
	return {
	"sentiment": "Neutral",
	"score": 0.5,
	"explanation": "Analysis unavailable"
	}

	def analyze_batch(self, news_list):
	"""
	วิเคราะห์ sentiment หลายข่าวพร้อมกัน

	Args:
	news_list: list ของ dict ที่มี title และ summary

	Returns:
	list: รายการผลการวิเคราะห์
	"""
	results = []

	for news in news_list:
	# รวม title และ summary
	combined_text = f"{news.get('title', '')} {news.get('summary', '')}"

	sentiment_result = self.analyze_sentiment(combined_text)

	results.append({
	**news,
	**sentiment_result
	})

	return results