Sooteemon commited on
Commit
0d6448a
·
verified ·
1 Parent(s): 99fb303

Update sentiment_analyzer.py

Browse files
Files changed (1) hide show
  1. sentiment_analyzer.py +14 -12
sentiment_analyzer.py CHANGED
@@ -67,8 +67,6 @@ Reason: [Brief explanation of your analysis]"""
67
 
68
  inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
69
  inputs = inputs.to(self.device)
70
-
71
- # --- MODIFIED: Get prompt length to slice output correctly ---
72
  prompt_length = inputs['input_ids'].shape[1]
73
 
74
  with torch.no_grad():
@@ -80,7 +78,6 @@ Reason: [Brief explanation of your analysis]"""
80
  pad_token_id=self.tokenizer.eos_token_id
81
  )
82
 
83
- # --- MODIFIED: Decode *only* the new tokens, not the prompt ---
84
  new_tokens = outputs[0][prompt_length:]
85
  response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
86
 
@@ -91,36 +88,41 @@ Reason: [Brief explanation of your analysis]"""
91
  return self._fallback_sentiment(text)
92
 
93
  def _parse_llm_analysis(self, response):
94
- """แยก sentiment, score, theme, impact และ explanation จาก LLM response"""
 
 
 
95
  sentiment = "Neutral"
96
  score = 0.5
97
  theme = "Other"
98
  impact = "Neutral"
99
- explanation = "Unable to parse" # Default explanation if parse fails
100
 
101
  try:
102
- sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE)
 
 
 
103
  if sentiment_line:
104
  sentiment = sentiment_line.group(1).capitalize()
105
 
106
- score_line = re.search(r'Score:\s*([\d.]+)', response)
107
  if score_line:
108
  score = float(score_line.group(1))
109
  score = max(0.0, min(1.0, score))
110
 
111
- theme_line = re.search(r'Theme:\s*([\w\/ -]+)', response, re.IGNORECASE)
112
  if theme_line:
113
  theme = theme_line.group(1).strip()
114
 
115
- impact_line = re.search(r'Impact:\s*(\w+)', response, re.IGNORECASE)
116
  if impact_line:
117
  impact = impact_line.group(1).capitalize().strip()
118
 
119
- # --- MODIFIED: More robust regex for Reason (captures multi-line) ---
120
- reason_match = re.search(r'Reason:\s*(.*)', response, re.DOTALL | re.IGNORECASE)
121
  if reason_match:
122
  explanation = reason_match.group(1).strip()
123
- # If parsing fails, explanation will remain "Unable to parse" or the last good value
124
 
125
  if sentiment not in ["Positive", "Negative", "Neutral"]:
126
  sentiment = "Neutral"
 
67
 
68
  inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
69
  inputs = inputs.to(self.device)
 
 
70
  prompt_length = inputs['input_ids'].shape[1]
71
 
72
  with torch.no_grad():
 
78
  pad_token_id=self.tokenizer.eos_token_id
79
  )
80
 
 
81
  new_tokens = outputs[0][prompt_length:]
82
  response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
83
 
 
88
  return self._fallback_sentiment(text)
89
 
90
  def _parse_llm_analysis(self, response):
91
+ """
92
+ แยก sentiment, score, theme, impact และ explanation จาก LLM response
93
+ (เวอร์ชันแก้ไขให้ทนทานต่อ Markdown และข้อผิดพลาด)
94
+ """
95
  sentiment = "Neutral"
96
  score = 0.5
97
  theme = "Other"
98
  impact = "Neutral"
99
+ explanation = "Unable to parse"
100
 
101
  try:
102
+ # --- MODIFIED: Made Regex more robust ---
103
+ # (Handles optional markdown "**" and optional colon ":")
104
+
105
+ sentiment_line = re.search(r'\**Sentiment:?\**\s*(\w+)', response, re.IGNORECASE)
106
  if sentiment_line:
107
  sentiment = sentiment_line.group(1).capitalize()
108
 
109
+ score_line = re.search(r'\**Score:?\**\s*([\d.]+)', response)
110
  if score_line:
111
  score = float(score_line.group(1))
112
  score = max(0.0, min(1.0, score))
113
 
114
+ theme_line = re.search(r'\**Theme:?\**\s*([\w\/ -]+)', response, re.IGNORECASE)
115
  if theme_line:
116
  theme = theme_line.group(1).strip()
117
 
118
+ impact_line = re.search(r'\**Impact:?\**\s*(\w+)', response, re.IGNORECASE)
119
  if impact_line:
120
  impact = impact_line.group(1).capitalize().strip()
121
 
122
+ reason_match = re.search(r'\**Reason:?\**\s*(.*)', response, re.DOTALL | re.IGNORECASE)
 
123
  if reason_match:
124
  explanation = reason_match.group(1).strip()
125
+ # --- End of MODIFIED block ---
126
 
127
  if sentiment not in ["Positive", "Negative", "Neutral"]:
128
  sentiment = "Neutral"