Update sentiment_analyzer.py
Browse files- sentiment_analyzer.py +12 -6
sentiment_analyzer.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 2 |
import torch
|
| 3 |
import re
|
|
|
|
| 4 |
|
| 5 |
class NewsAnalyzer:
|
| 6 |
def __init__(self, model_name="google/gemma-2-2b-it"):
|
|
@@ -12,10 +13,20 @@ class NewsAnalyzer:
|
|
| 12 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 13 |
print(f"Using device: {self.device}")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
try:
|
| 16 |
-
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
|
|
|
|
|
|
| 17 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 18 |
model_name,
|
|
|
|
| 19 |
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 20 |
device_map="auto" if self.device == "cuda" else None,
|
| 21 |
low_cpu_mem_usage=True
|
|
@@ -90,7 +101,6 @@ Reason: [Brief explanation of your analysis]"""
|
|
| 90 |
def _parse_llm_analysis(self, response):
|
| 91 |
"""
|
| 92 |
แยก sentiment, score, theme, impact และ explanation จาก LLM response
|
| 93 |
-
(เวอร์ชันแก้ไขให้ทนทานต่อ Markdown และข้อผิดพลาด)
|
| 94 |
"""
|
| 95 |
sentiment = "Neutral"
|
| 96 |
score = 0.5
|
|
@@ -99,9 +109,6 @@ Reason: [Brief explanation of your analysis]"""
|
|
| 99 |
explanation = "Unable to parse"
|
| 100 |
|
| 101 |
try:
|
| 102 |
-
# --- MODIFIED: Made Regex more robust ---
|
| 103 |
-
# (Handles optional markdown "**" and optional colon ":")
|
| 104 |
-
|
| 105 |
sentiment_line = re.search(r'\**Sentiment:?\**\s*(\w+)', response, re.IGNORECASE)
|
| 106 |
if sentiment_line:
|
| 107 |
sentiment = sentiment_line.group(1).capitalize()
|
|
@@ -122,7 +129,6 @@ Reason: [Brief explanation of your analysis]"""
|
|
| 122 |
reason_match = re.search(r'\**Reason:?\**\s*(.*)', response, re.DOTALL | re.IGNORECASE)
|
| 123 |
if reason_match:
|
| 124 |
explanation = reason_match.group(1).strip()
|
| 125 |
-
# --- End of MODIFIED block ---
|
| 126 |
|
| 127 |
if sentiment not in ["Positive", "Negative", "Neutral"]:
|
| 128 |
sentiment = "Neutral"
|
|
|
|
| 1 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 2 |
import torch
|
| 3 |
import re
|
| 4 |
+
import os # --- ADDED ---
|
| 5 |
|
| 6 |
class NewsAnalyzer:
|
| 7 |
def __init__(self, model_name="google/gemma-2-2b-it"):
|
|
|
|
| 13 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 14 |
print(f"Using device: {self.device}")
|
| 15 |
|
| 16 |
+
# --- ADDED: Get token from Space Secrets ---
|
| 17 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 18 |
+
|
| 19 |
+
if not hf_token:
|
| 20 |
+
print("Warning: HF_TOKEN secret not found. May fail to load gated models.")
|
| 21 |
+
|
| 22 |
try:
|
| 23 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 24 |
+
model_name,
|
| 25 |
+
token=hf_token # --- ADDED ---
|
| 26 |
+
)
|
| 27 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 28 |
model_name,
|
| 29 |
+
token=hf_token, # --- ADDED ---
|
| 30 |
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 31 |
device_map="auto" if self.device == "cuda" else None,
|
| 32 |
low_cpu_mem_usage=True
|
|
|
|
| 101 |
def _parse_llm_analysis(self, response):
|
| 102 |
"""
|
| 103 |
แยก sentiment, score, theme, impact และ explanation จาก LLM response
|
|
|
|
| 104 |
"""
|
| 105 |
sentiment = "Neutral"
|
| 106 |
score = 0.5
|
|
|
|
| 109 |
explanation = "Unable to parse"
|
| 110 |
|
| 111 |
try:
|
|
|
|
|
|
|
|
|
|
| 112 |
sentiment_line = re.search(r'\**Sentiment:?\**\s*(\w+)', response, re.IGNORECASE)
|
| 113 |
if sentiment_line:
|
| 114 |
sentiment = sentiment_line.group(1).capitalize()
|
|
|
|
| 129 |
reason_match = re.search(r'\**Reason:?\**\s*(.*)', response, re.DOTALL | re.IGNORECASE)
|
| 130 |
if reason_match:
|
| 131 |
explanation = reason_match.group(1).strip()
|
|
|
|
| 132 |
|
| 133 |
if sentiment not in ["Positive", "Negative", "Neutral"]:
|
| 134 |
sentiment = "Neutral"
|