Update sentiment_analyzer.py
Browse files- sentiment_analyzer.py +14 -12
sentiment_analyzer.py
CHANGED
|
@@ -67,8 +67,6 @@ Reason: [Brief explanation of your analysis]"""
|
|
| 67 |
|
| 68 |
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
|
| 69 |
inputs = inputs.to(self.device)
|
| 70 |
-
|
| 71 |
-
# --- MODIFIED: Get prompt length to slice output correctly ---
|
| 72 |
prompt_length = inputs['input_ids'].shape[1]
|
| 73 |
|
| 74 |
with torch.no_grad():
|
|
@@ -80,7 +78,6 @@ Reason: [Brief explanation of your analysis]"""
|
|
| 80 |
pad_token_id=self.tokenizer.eos_token_id
|
| 81 |
)
|
| 82 |
|
| 83 |
-
# --- MODIFIED: Decode *only* the new tokens, not the prompt ---
|
| 84 |
new_tokens = outputs[0][prompt_length:]
|
| 85 |
response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
|
| 86 |
|
|
@@ -91,36 +88,41 @@ Reason: [Brief explanation of your analysis]"""
|
|
| 91 |
return self._fallback_sentiment(text)
|
| 92 |
|
| 93 |
def _parse_llm_analysis(self, response):
|
| 94 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 95 |
sentiment = "Neutral"
|
| 96 |
score = 0.5
|
| 97 |
theme = "Other"
|
| 98 |
impact = "Neutral"
|
| 99 |
-
explanation = "Unable to parse"
|
| 100 |
|
| 101 |
try:
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
| 103 |
if sentiment_line:
|
| 104 |
sentiment = sentiment_line.group(1).capitalize()
|
| 105 |
|
| 106 |
-
score_line = re.search(r'Score
|
| 107 |
if score_line:
|
| 108 |
score = float(score_line.group(1))
|
| 109 |
score = max(0.0, min(1.0, score))
|
| 110 |
|
| 111 |
-
theme_line = re.search(r'Theme
|
| 112 |
if theme_line:
|
| 113 |
theme = theme_line.group(1).strip()
|
| 114 |
|
| 115 |
-
impact_line = re.search(r'Impact
|
| 116 |
if impact_line:
|
| 117 |
impact = impact_line.group(1).capitalize().strip()
|
| 118 |
|
| 119 |
-
|
| 120 |
-
reason_match = re.search(r'Reason:\s*(.*)', response, re.DOTALL | re.IGNORECASE)
|
| 121 |
if reason_match:
|
| 122 |
explanation = reason_match.group(1).strip()
|
| 123 |
-
#
|
| 124 |
|
| 125 |
if sentiment not in ["Positive", "Negative", "Neutral"]:
|
| 126 |
sentiment = "Neutral"
|
|
|
|
| 67 |
|
| 68 |
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
|
| 69 |
inputs = inputs.to(self.device)
|
|
|
|
|
|
|
| 70 |
prompt_length = inputs['input_ids'].shape[1]
|
| 71 |
|
| 72 |
with torch.no_grad():
|
|
|
|
| 78 |
pad_token_id=self.tokenizer.eos_token_id
|
| 79 |
)
|
| 80 |
|
|
|
|
| 81 |
new_tokens = outputs[0][prompt_length:]
|
| 82 |
response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
|
| 83 |
|
|
|
|
| 88 |
return self._fallback_sentiment(text)
|
| 89 |
|
| 90 |
def _parse_llm_analysis(self, response):
|
| 91 |
+
"""
|
| 92 |
+
แยก sentiment, score, theme, impact และ explanation จาก LLM response
|
| 93 |
+
(เวอร์ชันแก้ไขให้ทนทานต่อ Markdown และข้อผิดพลาด)
|
| 94 |
+
"""
|
| 95 |
sentiment = "Neutral"
|
| 96 |
score = 0.5
|
| 97 |
theme = "Other"
|
| 98 |
impact = "Neutral"
|
| 99 |
+
explanation = "Unable to parse"
|
| 100 |
|
| 101 |
try:
|
| 102 |
+
# --- MODIFIED: Made Regex more robust ---
|
| 103 |
+
# (Handles optional markdown "**" and optional colon ":")
|
| 104 |
+
|
| 105 |
+
sentiment_line = re.search(r'\**Sentiment:?\**\s*(\w+)', response, re.IGNORECASE)
|
| 106 |
if sentiment_line:
|
| 107 |
sentiment = sentiment_line.group(1).capitalize()
|
| 108 |
|
| 109 |
+
score_line = re.search(r'\**Score:?\**\s*([\d.]+)', response)
|
| 110 |
if score_line:
|
| 111 |
score = float(score_line.group(1))
|
| 112 |
score = max(0.0, min(1.0, score))
|
| 113 |
|
| 114 |
+
theme_line = re.search(r'\**Theme:?\**\s*([\w\/ -]+)', response, re.IGNORECASE)
|
| 115 |
if theme_line:
|
| 116 |
theme = theme_line.group(1).strip()
|
| 117 |
|
| 118 |
+
impact_line = re.search(r'\**Impact:?\**\s*(\w+)', response, re.IGNORECASE)
|
| 119 |
if impact_line:
|
| 120 |
impact = impact_line.group(1).capitalize().strip()
|
| 121 |
|
| 122 |
+
reason_match = re.search(r'\**Reason:?\**\s*(.*)', response, re.DOTALL | re.IGNORECASE)
|
|
|
|
| 123 |
if reason_match:
|
| 124 |
explanation = reason_match.group(1).strip()
|
| 125 |
+
# --- End of MODIFIED block ---
|
| 126 |
|
| 127 |
if sentiment not in ["Positive", "Negative", "Neutral"]:
|
| 128 |
sentiment = "Neutral"
|