Spaces:

Muhammadidrees
/

WellBeingLLMSInsight

Sleeping

App Files Files Community

Muhammadidrees commited on Sep 30, 2025

Commit

0071032

verified ·

1 Parent(s): f176b5b

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -19

app.py CHANGED Viewed

@@ -43,7 +43,7 @@ if tokenizer.pad_token is None:
 def clean_output(text):
-    """Remove reasoning artifacts and extract only the actual report"""
     # Remove common reasoning patterns
     patterns_to_remove = [
@@ -74,9 +74,64 @@ def clean_output(text):
             text = text[idx:]
             break
     # Clean up extra whitespace and duplicate asterisks
     text = re.sub(r'\*{3,}', '**', text)
     text = re.sub(r'\n{3,}', '\n\n', text)
     text = text.strip()
     return text
@@ -199,26 +254,29 @@ START REPORT NOW (no reasoning or commentary):
         # Clean up the output
         output_text = clean_output(output_text)
-        # Validation
-        required_sections = [
-            "Executive Summary",
-            "System-Specific Analysis",
-            "Personalized Action Plan",
-            "Interaction Alerts",
-            "Tabular Mapping",
-            "Enhanced AI Insights"
-        ]
-        missing_sections = [s for s in required_sections if s not in output_text]
-        if len(output_text) < 800 or len(missing_sections) >= 3:
-            warning = "\n\n⚠️ **Model Performance Issue Detected**\n\n"
-            warning += f"Generated text length: {len(output_text)} characters\n"
-            warning += f"Missing sections: {', '.join(missing_sections) if missing_sections else 'None'}\n\n"
-            warning += "**This model may not be suitable for this task. Consider:**\n"
-            warning += "1. Using Llama-3-8B-Instruct or Mistral-7B-Instruct\n"
-            warning += "2. Fine-tuning on medical report generation\n"
-            warning += "3. Using API-based models (GPT-4, Claude, etc.)\n"
             output_text += warning
         return output_text

 def clean_output(text):
+    """Remove reasoning artifacts and clean formatting"""
     # Remove common reasoning patterns
     patterns_to_remove = [
             text = text[idx:]
             break
+    # Fix common formatting issues from the model
+    replacements = {
+        # Fix typos
+        'Albumen': 'Albumin',
+        'Creatinin': 'Creatinine',
+        'Nomal': 'Normal',
+        'Lympho': 'Lymphocytes',
+        'Strea‑ngths': 'Strengths',
+        'Priorities': 'Priorities',
+        'Heath': 'Health',
+        'Kidnee': 'Kidney',
+        'Meta‑bolic': 'Metabolic',
+        'Health*': 'Health**',
+        'Personelized': 'Personalized',
+        'Action': 'Action',
+        'Plan': 'Plan',
+        'Interaction': 'Interaction',
+        'Tabular': 'Tabular',
+        'Mapping': 'Mapping',
+        'Biomarker': 'Biomarker',
+        'Value': 'Value',
+        'Status': 'Status',
+        'Clinical': 'Clinical',
+        'Insight': 'Insight',
+        'Recommentation': 'Recommendation',
+        'Longe‑timal': 'Longitudinal',
+        'Longe-Term': 'Long-Term',
+        'Insigh ts': 'Insights',
+        'A.I.': 'AI',
+        'Immune': 'Immune',
+        # Fix units
+        'gdL': 'g/dL',
+        'mgl/dL': 'mg/dL',
+        'mg/mL': 'mg/L',
+        'ui/l': 'U/L',
+        'kc/ml': 'K/uL',
+        'fl': 'fL',
+        # Clean up weird unicode characters
+        '': '',
+        '': '',
+        '‑': '-',
+        '–': '-',
+        '—': '-',
+        '│': '|',
+        '├─': '|',
+        '•': '-',
+        '‐': '-',
+    }
+    for old, new in replacements.items():
+        text = text.replace(old, new)
     # Clean up extra whitespace and duplicate asterisks
     text = re.sub(r'\*{3,}', '**', text)
     text = re.sub(r'\n{3,}', '\n\n', text)
+    text = re.sub(r' {2,}', ' ', text)
     text = text.strip()
     return text
         # Clean up the output
         output_text = clean_output(output_text)
+        # Improved validation with flexible matching
+        required_sections = {
+            "Executive Summary": ["Executive Summary", "Executive Sum", "Executive Sum"],
+            "System-Specific Analysis": ["System-Specific Analysis", "System‑Specific", "Sys‐tem‑Specific"],
+            "Personalized Action Plan": ["Personalized Action Plan", "Personelized Action", "Action Plan"],
+            "Interaction Alerts": ["Interaction Alerts", "Interaction Alerts"],
+            "Tabular Mapping": ["Tabular Mapping", "Tabular Mapping", "| Bioma"],
+            "Enhanced AI Insights": ["Enhanced AI Insights", "Enhanced A.I. Insigh", "Longe‑ti"]
+        }
+        missing_sections = []
+        for section, variants in required_sections.items():
+            if not any(variant in output_text for variant in variants):
+                missing_sections.append(section)
+        # Only warn if truly incomplete (very short OR missing most sections)
+        if len(output_text) < 500 or len(missing_sections) >= 4:
+            warning = "\n\n⚠️ **Warning: Incomplete Output**\n\n"
+            warning += f"Generated: {len(output_text)} characters | Missing: {', '.join(missing_sections) if missing_sections else 'None'}\n\n"
+            warning += "**Suggestions:**\n"
+            warning += "- Try regenerating with different parameter values\n"
+            warning += "- Consider using a larger model (Llama-3-8B, Mistral-7B)\n"
+            warning += "- Or use API-based models for guaranteed quality\n"
             output_text += warning
         return output_text