Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -43,7 +43,7 @@ if tokenizer.pad_token is None:
|
|
| 43 |
|
| 44 |
|
| 45 |
def clean_output(text):
|
| 46 |
-
"""Remove reasoning artifacts and
|
| 47 |
|
| 48 |
# Remove common reasoning patterns
|
| 49 |
patterns_to_remove = [
|
|
@@ -74,9 +74,64 @@ def clean_output(text):
|
|
| 74 |
text = text[idx:]
|
| 75 |
break
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
# Clean up extra whitespace and duplicate asterisks
|
| 78 |
text = re.sub(r'\*{3,}', '**', text)
|
| 79 |
text = re.sub(r'\n{3,}', '\n\n', text)
|
|
|
|
| 80 |
text = text.strip()
|
| 81 |
|
| 82 |
return text
|
|
@@ -199,26 +254,29 @@ START REPORT NOW (no reasoning or commentary):
|
|
| 199 |
# Clean up the output
|
| 200 |
output_text = clean_output(output_text)
|
| 201 |
|
| 202 |
-
#
|
| 203 |
-
required_sections =
|
| 204 |
-
"Executive Summary",
|
| 205 |
-
"System-Specific Analysis",
|
| 206 |
-
"Personalized Action Plan",
|
| 207 |
-
"Interaction Alerts",
|
| 208 |
-
"Tabular Mapping",
|
| 209 |
-
"Enhanced AI Insights"
|
| 210 |
-
|
| 211 |
|
| 212 |
-
missing_sections = [
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
-
if
|
| 215 |
-
|
| 216 |
-
warning
|
| 217 |
-
warning += f"Missing
|
| 218 |
-
warning += "**
|
| 219 |
-
warning += "
|
| 220 |
-
warning += "
|
| 221 |
-
warning += "
|
| 222 |
output_text += warning
|
| 223 |
|
| 224 |
return output_text
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
def clean_output(text):
|
| 46 |
+
"""Remove reasoning artifacts and clean formatting"""
|
| 47 |
|
| 48 |
# Remove common reasoning patterns
|
| 49 |
patterns_to_remove = [
|
|
|
|
| 74 |
text = text[idx:]
|
| 75 |
break
|
| 76 |
|
| 77 |
+
# Fix common formatting issues from the model
|
| 78 |
+
replacements = {
|
| 79 |
+
# Fix typos
|
| 80 |
+
'Albumen': 'Albumin',
|
| 81 |
+
'Creatinin': 'Creatinine',
|
| 82 |
+
'Nomal': 'Normal',
|
| 83 |
+
'Lympho': 'Lymphocytes',
|
| 84 |
+
'Strea‑ngths': 'Strengths',
|
| 85 |
+
'Priorities': 'Priorities',
|
| 86 |
+
'Heath': 'Health',
|
| 87 |
+
'Kidnee': 'Kidney',
|
| 88 |
+
'Meta‑bolic': 'Metabolic',
|
| 89 |
+
'Health*': 'Health**',
|
| 90 |
+
'Personelized': 'Personalized',
|
| 91 |
+
'Action': 'Action',
|
| 92 |
+
'Plan': 'Plan',
|
| 93 |
+
'Interaction': 'Interaction',
|
| 94 |
+
'Tabular': 'Tabular',
|
| 95 |
+
'Mapping': 'Mapping',
|
| 96 |
+
'Biomarker': 'Biomarker',
|
| 97 |
+
'Value': 'Value',
|
| 98 |
+
'Status': 'Status',
|
| 99 |
+
'Clinical': 'Clinical',
|
| 100 |
+
'Insight': 'Insight',
|
| 101 |
+
'Recommentation': 'Recommendation',
|
| 102 |
+
'Longe‑timal': 'Longitudinal',
|
| 103 |
+
'Longe-Term': 'Long-Term',
|
| 104 |
+
'Insigh ts': 'Insights',
|
| 105 |
+
'A.I.': 'AI',
|
| 106 |
+
'Immune': 'Immune',
|
| 107 |
+
|
| 108 |
+
# Fix units
|
| 109 |
+
'gdL': 'g/dL',
|
| 110 |
+
'mgl/dL': 'mg/dL',
|
| 111 |
+
'mg/mL': 'mg/L',
|
| 112 |
+
'ui/l': 'U/L',
|
| 113 |
+
'kc/ml': 'K/uL',
|
| 114 |
+
'fl': 'fL',
|
| 115 |
+
|
| 116 |
+
# Clean up weird unicode characters
|
| 117 |
+
'': '',
|
| 118 |
+
'': '',
|
| 119 |
+
'‑': '-',
|
| 120 |
+
'–': '-',
|
| 121 |
+
'—': '-',
|
| 122 |
+
'│': '|',
|
| 123 |
+
'├─': '|',
|
| 124 |
+
'•': '-',
|
| 125 |
+
'‐': '-',
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
for old, new in replacements.items():
|
| 129 |
+
text = text.replace(old, new)
|
| 130 |
+
|
| 131 |
# Clean up extra whitespace and duplicate asterisks
|
| 132 |
text = re.sub(r'\*{3,}', '**', text)
|
| 133 |
text = re.sub(r'\n{3,}', '\n\n', text)
|
| 134 |
+
text = re.sub(r' {2,}', ' ', text)
|
| 135 |
text = text.strip()
|
| 136 |
|
| 137 |
return text
|
|
|
|
| 254 |
# Clean up the output
|
| 255 |
output_text = clean_output(output_text)
|
| 256 |
|
| 257 |
+
# Improved validation with flexible matching
|
| 258 |
+
required_sections = {
|
| 259 |
+
"Executive Summary": ["Executive Summary", "Executive Sum", "Executive Sum"],
|
| 260 |
+
"System-Specific Analysis": ["System-Specific Analysis", "System‑Specific", "Sys‐tem‑Specific"],
|
| 261 |
+
"Personalized Action Plan": ["Personalized Action Plan", "Personelized Action", "Action Plan"],
|
| 262 |
+
"Interaction Alerts": ["Interaction Alerts", "Interaction Alerts"],
|
| 263 |
+
"Tabular Mapping": ["Tabular Mapping", "Tabular Mapping", "| Bioma"],
|
| 264 |
+
"Enhanced AI Insights": ["Enhanced AI Insights", "Enhanced A.I. Insigh", "Longe‑ti"]
|
| 265 |
+
}
|
| 266 |
|
| 267 |
+
missing_sections = []
|
| 268 |
+
for section, variants in required_sections.items():
|
| 269 |
+
if not any(variant in output_text for variant in variants):
|
| 270 |
+
missing_sections.append(section)
|
| 271 |
|
| 272 |
+
# Only warn if truly incomplete (very short OR missing most sections)
|
| 273 |
+
if len(output_text) < 500 or len(missing_sections) >= 4:
|
| 274 |
+
warning = "\n\n⚠️ **Warning: Incomplete Output**\n\n"
|
| 275 |
+
warning += f"Generated: {len(output_text)} characters | Missing: {', '.join(missing_sections) if missing_sections else 'None'}\n\n"
|
| 276 |
+
warning += "**Suggestions:**\n"
|
| 277 |
+
warning += "- Try regenerating with different parameter values\n"
|
| 278 |
+
warning += "- Consider using a larger model (Llama-3-8B, Mistral-7B)\n"
|
| 279 |
+
warning += "- Or use API-based models for guaranteed quality\n"
|
| 280 |
output_text += warning
|
| 281 |
|
| 282 |
return output_text
|