Spaces:

chenemii
/

Par-ity_Project

Paused

App Files Files Community

chenemii commited on Aug 30, 2025

Commit

4773d5a

1 Parent(s): 7c0b2f1

Fix strengths parsing: ensure full sentences and limit to exactly three

Browse files

Files changed (1) hide show

app/models/llm_analyzer.py +28 -3

app/models/llm_analyzer.py CHANGED Viewed

@@ -654,9 +654,34 @@ def parse_and_format_analysis(raw_analysis):
     strengths_match = re.search(r'\*\*Strengths\*\*\s*(.*?)(?=\*\*Areas for Improvement\*\*|\*\*Practice Tips\*\*|$)', raw_analysis, re.IGNORECASE | re.DOTALL)
     if strengths_match:
         strengths_text = strengths_match.group(1)
-        # Extract numbered items (1. Topic: Description)
-        strength_items = re.findall(r'\d+\.\s*([^:]+):\s*([^\n\d]+)', strengths_text)
-        formatted_analysis['strengths'] = [f"{topic.strip()}: {desc.strip()}" for topic, desc in strength_items if topic.strip() and desc.strip()]
     # Extract areas for improvement using the new structured format
     weaknesses_match = re.search(r'\*\*Areas for Improvement\*\*\s*(.*?)(?=\*\*Practice Tips\*\*|$)', raw_analysis, re.IGNORECASE | re.DOTALL)

     strengths_match = re.search(r'\*\*Strengths\*\*\s*(.*?)(?=\*\*Areas for Improvement\*\*|\*\*Practice Tips\*\*|$)', raw_analysis, re.IGNORECASE | re.DOTALL)
     if strengths_match:
         strengths_text = strengths_match.group(1)
+        # Extract numbered items allowing digits/percentages in description until next numbered item
+        # Pattern: 1. Topic: Sentence.
+        strength_items = re.findall(r'\n?\s*\d+\.\s*([^:]+):\s*([\s\S]*?)(?=(?:\n\s*\d+\.|\n\s*\*\*|$))', strengths_text)
+        cleaned_strengths = []
+        for topic, desc in strength_items:
+            topic_clean = topic.strip()
+            # Take first full sentence from desc
+            desc_text = desc.strip().replace('\n', ' ')
+            # Ensure we end at the first period or em dash break
+            sentence_match = re.match(r'(.+?\.)', desc_text)
+            if sentence_match:
+                desc_clean = sentence_match.group(1).strip()
+            else:
+                # Fallback: up to 180 chars
+                desc_clean = desc_text[:180].strip()
+            if topic_clean and desc_clean:
+                cleaned_strengths.append(f"{topic_clean}: {desc_clean}")
+        # Ensure exactly three strengths
+        formatted_analysis['strengths'] = cleaned_strengths[:3]
+        # If fewer than three were parsed, fall back to any colon-style lines present
+        if len(formatted_analysis['strengths']) < 3:
+            fallback_items = re.findall(r'^[\-•]?\s*([^:\n]+):\s*(.+)$', strengths_text, re.MULTILINE)
+            for topic, desc in fallback_items:
+                item = f"{topic.strip()}: {desc.strip()}"
+                if item not in formatted_analysis['strengths']:
+                    formatted_analysis['strengths'].append(item)
+                if len(formatted_analysis['strengths']) >= 3:
+                    break
     # Extract areas for improvement using the new structured format
     weaknesses_match = re.search(r'\*\*Areas for Improvement\*\*\s*(.*?)(?=\*\*Practice Tips\*\*|$)', raw_analysis, re.IGNORECASE | re.DOTALL)