Sooteemon commited on
Commit
34c2f86
·
verified ·
1 Parent(s): 6dead6e

Update sentiment_analyzer.py

Browse files
Files changed (1) hide show
  1. sentiment_analyzer.py +12 -6
sentiment_analyzer.py CHANGED
@@ -1,6 +1,7 @@
1
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
2
  import torch
3
  import re
 
4
 
5
  class NewsAnalyzer:
6
  def __init__(self, model_name="google/gemma-2-2b-it"):
@@ -12,10 +13,20 @@ class NewsAnalyzer:
12
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
13
  print(f"Using device: {self.device}")
14
 
 
 
 
 
 
 
15
  try:
16
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
17
  self.model = AutoModelForCausalLM.from_pretrained(
18
  model_name,
 
19
  torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
20
  device_map="auto" if self.device == "cuda" else None,
21
  low_cpu_mem_usage=True
@@ -90,7 +101,6 @@ Reason: [Brief explanation of your analysis]"""
90
  def _parse_llm_analysis(self, response):
91
  """
92
  แยก sentiment, score, theme, impact และ explanation จาก LLM response
93
- (เวอร์ชันแก้ไขให้ทนทานต่อ Markdown และข้อผิดพลาด)
94
  """
95
  sentiment = "Neutral"
96
  score = 0.5
@@ -99,9 +109,6 @@ Reason: [Brief explanation of your analysis]"""
99
  explanation = "Unable to parse"
100
 
101
  try:
102
- # --- MODIFIED: Made Regex more robust ---
103
- # (Handles optional markdown "**" and optional colon ":")
104
-
105
  sentiment_line = re.search(r'\**Sentiment:?\**\s*(\w+)', response, re.IGNORECASE)
106
  if sentiment_line:
107
  sentiment = sentiment_line.group(1).capitalize()
@@ -122,7 +129,6 @@ Reason: [Brief explanation of your analysis]"""
122
  reason_match = re.search(r'\**Reason:?\**\s*(.*)', response, re.DOTALL | re.IGNORECASE)
123
  if reason_match:
124
  explanation = reason_match.group(1).strip()
125
- # --- End of MODIFIED block ---
126
 
127
  if sentiment not in ["Positive", "Negative", "Neutral"]:
128
  sentiment = "Neutral"
 
1
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
2
  import torch
3
  import re
4
+ import os # --- ADDED ---
5
 
6
  class NewsAnalyzer:
7
  def __init__(self, model_name="google/gemma-2-2b-it"):
 
13
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
14
  print(f"Using device: {self.device}")
15
 
16
+ # --- ADDED: Get token from Space Secrets ---
17
+ hf_token = os.getenv("HF_TOKEN")
18
+
19
+ if not hf_token:
20
+ print("Warning: HF_TOKEN secret not found. May fail to load gated models.")
21
+
22
  try:
23
+ self.tokenizer = AutoTokenizer.from_pretrained(
24
+ model_name,
25
+ token=hf_token # --- ADDED ---
26
+ )
27
  self.model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
29
+ token=hf_token, # --- ADDED ---
30
  torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
31
  device_map="auto" if self.device == "cuda" else None,
32
  low_cpu_mem_usage=True
 
101
  def _parse_llm_analysis(self, response):
102
  """
103
  แยก sentiment, score, theme, impact และ explanation จาก LLM response
 
104
  """
105
  sentiment = "Neutral"
106
  score = 0.5
 
109
  explanation = "Unable to parse"
110
 
111
  try:
 
 
 
112
  sentiment_line = re.search(r'\**Sentiment:?\**\s*(\w+)', response, re.IGNORECASE)
113
  if sentiment_line:
114
  sentiment = sentiment_line.group(1).capitalize()
 
129
  reason_match = re.search(r'\**Reason:?\**\s*(.*)', response, re.DOTALL | re.IGNORECASE)
130
  if reason_match:
131
  explanation = reason_match.group(1).strip()
 
132
 
133
  if sentiment not in ["Positive", "Negative", "Neutral"]:
134
  sentiment = "Neutral"