Sooteemon commited on
Commit
57b6068
·
verified ·
1 Parent(s): 34c2f86

Update sentiment_analyzer.py

Browse files
Files changed (1) hide show
  1. sentiment_analyzer.py +64 -130
sentiment_analyzer.py CHANGED
@@ -1,162 +1,94 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
2
  import torch
3
  import re
4
- import os # --- ADDED ---
5
 
6
  class NewsAnalyzer:
7
- def __init__(self, model_name="google/gemma-2-2b-it"):
8
  """
9
- Initialize news analyzer with Gemma model
10
  """
11
- print(f"Loading model: {model_name}")
12
-
13
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
14
- print(f"Using device: {self.device}")
15
-
16
- # --- ADDED: Get token from Space Secrets ---
17
- hf_token = os.getenv("HF_TOKEN")
18
 
19
- if not hf_token:
20
- print("Warning: HF_TOKEN secret not found. May fail to load gated models.")
21
 
22
  try:
23
- self.tokenizer = AutoTokenizer.from_pretrained(
24
- model_name,
25
- token=hf_token # --- ADDED ---
26
- )
27
- self.model = AutoModelForCausalLM.from_pretrained(
28
- model_name,
29
- token=hf_token, # --- ADDED ---
30
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
31
- device_map="auto" if self.device == "cuda" else None,
32
- low_cpu_mem_usage=True
33
- )
34
-
35
- if self.device == "cpu":
36
- self.model = self.model.to(self.device)
37
-
38
- print("Model loaded successfully!")
39
-
40
- except Exception as e:
41
- print(f"Error loading model: {e}")
42
- self.model = None
43
  self.sentiment_pipeline = pipeline(
44
  "sentiment-analysis",
45
- model="distilbert-base-uncased-finetuned-sst-2-english"
 
46
  )
47
 
48
- def analyze_news_item(self, text):
49
- """
50
- วิเคราะห์ข่าว (Sentiment, Theme, Impact)
51
- """
52
- if not text or len(text.strip()) == 0:
53
- return {
54
- "sentiment": "Neutral", "score": 0.5, "theme": "Other",
55
- "impact": "Neutral", "explanation": "No text to analyze"
56
- }
57
-
58
- if self.model is None:
59
- return self._fallback_sentiment(text)
60
-
61
- try:
62
- prompt = f"""Analyze this financial news article. Provide your analysis in the *exact* format specified below.
63
-
64
- **Categories to use:**
65
- - **Theme:** [Choose one: Earnings/Finance, Product/Service, Legal/Regulation, Management/M&A, Market/Economy, Other]
66
- - **Impact:** [Choose one: Opportunity, Risk, Neutral]
67
- - **Sentiment:** [Choose one: Positive, Negative, Neutral]
68
-
69
- **News Article:**
70
- {text[:500]}
71
-
72
- **Your Analysis (Use this *exact* format):**
73
- Sentiment: [Positive/Negative/Neutral]
74
- Score: [0.0-1.0 confidence score]
75
- Theme: [Selected Theme]
76
- Impact: [Selected Impact]
77
- Reason: [Brief explanation of your analysis]"""
78
 
79
- inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
80
- inputs = inputs.to(self.device)
81
- prompt_length = inputs['input_ids'].shape[1]
82
 
83
- with torch.no_grad():
84
- outputs = self.model.generate(
85
- **inputs,
86
- max_new_tokens=200,
87
- temperature=0.3,
88
- do_sample=True,
89
- pad_token_id=self.tokenizer.eos_token_id
90
- )
91
-
92
- new_tokens = outputs[0][prompt_length:]
93
- response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
94
-
95
- return self._parse_llm_analysis(response)
96
 
97
  except Exception as e:
98
- print(f"Error in analysis: {e}")
99
- return self._fallback_sentiment(text)
 
100
 
101
- def _parse_llm_analysis(self, response):
102
  """
103
- แยก sentiment, score, theme, impact และ explanation จาก LLM response
104
  """
105
- sentiment = "Neutral"
106
- score = 0.5
107
- theme = "Other"
108
- impact = "Neutral"
109
- explanation = "Unable to parse"
110
 
111
  try:
112
- sentiment_line = re.search(r'\**Sentiment:?\**\s*(\w+)', response, re.IGNORECASE)
113
- if sentiment_line:
114
- sentiment = sentiment_line.group(1).capitalize()
 
115
 
116
- score_line = re.search(r'\**Score:?\**\s*([\d.]+)', response)
117
- if score_line:
118
- score = float(score_line.group(1))
119
- score = max(0.0, min(1.0, score))
120
-
121
- theme_line = re.search(r'\**Theme:?\**\s*([\w\/ -]+)', response, re.IGNORECASE)
122
- if theme_line:
123
- theme = theme_line.group(1).strip()
124
-
125
- impact_line = re.search(r'\**Impact:?\**\s*(\w+)', response, re.IGNORECASE)
126
- if impact_line:
127
- impact = impact_line.group(1).capitalize().strip()
128
-
129
- reason_match = re.search(r'\**Reason:?\**\s*(.*)', response, re.DOTALL | re.IGNORECASE)
130
- if reason_match:
131
- explanation = reason_match.group(1).strip()
132
-
133
- if sentiment not in ["Positive", "Negative", "Neutral"]:
134
- sentiment = "Neutral"
135
- if impact not in ["Opportunity", "Risk", "Neutral"]:
136
- impact = "Neutral"
137
 
138
- except Exception as e:
139
- print(f"Parse error: {e}. Response was: {response}")
 
 
 
 
140
 
141
- return {
142
- "sentiment": sentiment, "score": score, "theme": theme,
143
- "impact": impact, "explanation": explanation
144
- }
145
 
146
- def _fallback_sentiment(self, text):
147
- """Fallback method ใช้ DistilBERT"""
148
- try:
149
- result = self.sentiment_pipeline(text[:512])[0]
150
- sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
151
- score = result['score']
152
  return {
153
- "sentiment": sentiment, "score": score, "theme": "N/A",
154
- "impact": "N/A", "explanation": f"Analyzed using fallback model"
 
 
 
155
  }
156
- except:
 
 
157
  return {
158
  "sentiment": "Neutral", "score": 0.5, "theme": "N/A",
159
- "impact": "N/A", "explanation": "Analysis unavailable"
160
  }
161
 
162
  def analyze_batch(self, news_list):
@@ -171,4 +103,6 @@ Reason: [Brief explanation of your analysis]"""
171
  **news,
172
  **sentiment_result
173
  })
174
- return results
 
 
 
1
+ from transformers import pipeline
2
  import torch
3
  import re
 
4
 
5
  class NewsAnalyzer:
6
+ def __init__(self, model_name=None): # Model_name is no longer needed
7
  """
8
+ Initialize news analyzer with fast, CPU-friendly Zero-Shot pipelines
9
  """
10
+ print("Initializing Zero-Shot News Analyzer...")
 
 
 
 
 
 
11
 
12
+ self.device = 0 if torch.cuda.is_available() else -1 # Use 0 for GPU, -1 for CPU
13
+ print(f"Using device: {'cuda' if self.device == 0 else 'cpu'}")
14
 
15
  try:
16
+ # Pipeline 1: For Sentiment Analysis
17
+ print("Loading Sentiment model...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  self.sentiment_pipeline = pipeline(
19
  "sentiment-analysis",
20
+ model="distilbert-base-uncased-finetuned-sst-2-english",
21
+ device=self.device
22
  )
23
 
24
+ # Pipeline 2: For Zero-Shot Classification (Theme & Impact)
25
+ print("Loading Zero-Shot model...")
26
+ self.classifier_pipeline = pipeline(
27
+ "zero-shot-classification",
28
+ model="Moritz/bart-large-mnli-fever-anli-ling-wanli",
29
+ device=self.device
30
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ print("Models loaded successfully!")
 
 
33
 
34
+ # Define the labels for classification
35
+ self.theme_labels = [
36
+ "Earnings/Finance", "Product/Service", "Legal/Regulation",
37
+ "Management/M&A", "Market/Economy", "Other"
38
+ ]
39
+ self.impact_labels = ["Opportunity", "Risk", "Neutral"]
 
 
 
 
 
 
 
40
 
41
  except Exception as e:
42
+ print(f"Fatal error loading models: {e}")
43
+ self.sentiment_pipeline = None
44
+ self.classifier_pipeline = None
45
 
46
+ def analyze_news_item(self, text):
47
  """
48
+ วิเคราะห์ข่าว (Sentiment, Theme, Impact) โดยใช้ Zero-Shot
49
  """
50
+ if not text or len(text.strip()) == 0 or not self.classifier_pipeline:
51
+ return {
52
+ "sentiment": "Neutral", "score": 0.5, "theme": "N/A",
53
+ "impact": "N/A", "explanation": "No text or model"
54
+ }
55
 
56
  try:
57
+ # 1. Analyze Sentiment
58
+ sentiment_result = self.sentiment_pipeline(text[:512])[0] # Truncate for speed
59
+ sentiment = sentiment_result['label'].capitalize()
60
+ score = sentiment_result['score']
61
 
62
+ # 2. Analyze Theme
63
+ theme_result = self.classifier_pipeline(
64
+ text[:512],
65
+ candidate_labels=self.theme_labels
66
+ )
67
+ theme = theme_result['labels'][0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ # 3. Analyze Impact
70
+ impact_result = self.classifier_pipeline(
71
+ text[:512],
72
+ candidate_labels=self.impact_labels
73
+ )
74
+ impact = impact_result['labels'][0]
75
 
76
+ # 4. Create an explanation
77
+ explanation = f"Classified as '{theme}' (Impact: {impact}) via zero-shot analysis."
 
 
78
 
 
 
 
 
 
 
79
  return {
80
+ "sentiment": "Positive" if sentiment == "Positive" else "Negative", # Simple conversion
81
+ "score": score,
82
+ "theme": theme,
83
+ "impact": impact,
84
+ "explanation": explanation
85
  }
86
+
87
+ except Exception as e:
88
+ print(f"Error in analysis: {e}")
89
  return {
90
  "sentiment": "Neutral", "score": 0.5, "theme": "N/A",
91
+ "impact": "N/A", "explanation": "Analysis failed"
92
  }
93
 
94
  def analyze_batch(self, news_list):
 
103
  **news,
104
  **sentiment_result
105
  })
106
+ return results
107
+
108
+ # --- ไม่ต้องใช้ฟังก์ชัน _parse หรือ _fallback อีกต่อไป ---