Muhammadidrees commited on
Commit
0071032
·
verified ·
1 Parent(s): f176b5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -19
app.py CHANGED
@@ -43,7 +43,7 @@ if tokenizer.pad_token is None:
43
 
44
 
45
  def clean_output(text):
46
- """Remove reasoning artifacts and extract only the actual report"""
47
 
48
  # Remove common reasoning patterns
49
  patterns_to_remove = [
@@ -74,9 +74,64 @@ def clean_output(text):
74
  text = text[idx:]
75
  break
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  # Clean up extra whitespace and duplicate asterisks
78
  text = re.sub(r'\*{3,}', '**', text)
79
  text = re.sub(r'\n{3,}', '\n\n', text)
 
80
  text = text.strip()
81
 
82
  return text
@@ -199,26 +254,29 @@ START REPORT NOW (no reasoning or commentary):
199
  # Clean up the output
200
  output_text = clean_output(output_text)
201
 
202
- # Validation
203
- required_sections = [
204
- "Executive Summary",
205
- "System-Specific Analysis",
206
- "Personalized Action Plan",
207
- "Interaction Alerts",
208
- "Tabular Mapping",
209
- "Enhanced AI Insights"
210
- ]
211
 
212
- missing_sections = [s for s in required_sections if s not in output_text]
 
 
 
213
 
214
- if len(output_text) < 800 or len(missing_sections) >= 3:
215
- warning = "\n\n⚠️ **Model Performance Issue Detected**\n\n"
216
- warning += f"Generated text length: {len(output_text)} characters\n"
217
- warning += f"Missing sections: {', '.join(missing_sections) if missing_sections else 'None'}\n\n"
218
- warning += "**This model may not be suitable for this task. Consider:**\n"
219
- warning += "1. Using Llama-3-8B-Instruct or Mistral-7B-Instruct\n"
220
- warning += "2. Fine-tuning on medical report generation\n"
221
- warning += "3. Using API-based models (GPT-4, Claude, etc.)\n"
222
  output_text += warning
223
 
224
  return output_text
 
43
 
44
 
45
  def clean_output(text):
46
+ """Remove reasoning artifacts and clean formatting"""
47
 
48
  # Remove common reasoning patterns
49
  patterns_to_remove = [
 
74
  text = text[idx:]
75
  break
76
 
77
+ # Fix common formatting issues from the model
78
+ replacements = {
79
+ # Fix typos
80
+ 'Albumen': 'Albumin',
81
+ 'Creatinin': 'Creatinine',
82
+ 'Nomal': 'Normal',
83
+ 'Lympho': 'Lymphocytes',
84
+ 'Strea‑ngths': 'Strengths',
85
+ 'Priori­ties': 'Priorities',
86
+ 'Hea​th': 'Health',
87
+ 'Kid­nee': 'Kidney',
88
+ 'Meta‑bolic': 'Metabolic',
89
+ 'Heal­th*': 'Health**',
90
+ 'Per­sone­lized': 'Personalized',
91
+ 'Ac­tion': 'Action',
92
+ 'Pla­n': 'Plan',
93
+ 'Interac­tion': 'Interaction',
94
+ 'Ta­bular': 'Tabular',
95
+ 'Ma­pping': 'Mapping',
96
+ 'Bioma­rker': 'Biomarker',
97
+ 'Val­ue': 'Value',
98
+ 'Sta­tus': 'Status',
99
+ 'Clin­i­cal': 'Clinical',
100
+ 'In­si­ght': 'Insight',
101
+ 'Recom­men­ta­tion': 'Recommendation',
102
+ 'Lon­ge‑ti­mal': 'Longitudinal',
103
+ 'Lon­ge-Term': 'Long-Term',
104
+ 'Insigh ts': 'Insights',
105
+ 'A.I.': 'AI',
106
+ 'Immu­ne': 'Immune',
107
+
108
+ # Fix units
109
+ 'gdL': 'g/dL',
110
+ 'mgl/dL': 'mg/dL',
111
+ 'mg/mL': 'mg/L',
112
+ 'ui/l': 'U/L',
113
+ 'kc/ml': 'K/uL',
114
+ 'fl': 'fL',
115
+
116
+ # Clean up weird unicode characters
117
+ '­': '',
118
+ '​': '',
119
+ '‑': '-',
120
+ '–': '-',
121
+ '—': '-',
122
+ '│': '|',
123
+ '├─': '|',
124
+ '•': '-',
125
+ '‐': '-',
126
+ }
127
+
128
+ for old, new in replacements.items():
129
+ text = text.replace(old, new)
130
+
131
  # Clean up extra whitespace and duplicate asterisks
132
  text = re.sub(r'\*{3,}', '**', text)
133
  text = re.sub(r'\n{3,}', '\n\n', text)
134
+ text = re.sub(r' {2,}', ' ', text)
135
  text = text.strip()
136
 
137
  return text
 
254
  # Clean up the output
255
  output_text = clean_output(output_text)
256
 
257
+ # Improved validation with flexible matching
258
+ required_sections = {
259
+ "Executive Summary": ["Executive Summary", "Execut­ive Sum", "Executive Sum"],
260
+ "System-Specific Analysis": ["System-Specific Analysis", "System‑Specific", "Sys‐tem‑Specific"],
261
+ "Personalized Action Plan": ["Personalized Action Plan", "Per­sone­lized Ac­tion", "Action Plan"],
262
+ "Interaction Alerts": ["Interaction Alerts", "Interac­tion Alerts"],
263
+ "Tabular Mapping": ["Tabular Mapping", "Ta­bular Ma­pping", "| Bioma"],
264
+ "Enhanced AI Insights": ["Enhanced AI Insights", "Enhanced A.I. Insigh", "Lon­ge‑ti"]
265
+ }
266
 
267
+ missing_sections = []
268
+ for section, variants in required_sections.items():
269
+ if not any(variant in output_text for variant in variants):
270
+ missing_sections.append(section)
271
 
272
+ # Only warn if truly incomplete (very short OR missing most sections)
273
+ if len(output_text) < 500 or len(missing_sections) >= 4:
274
+ warning = "\n\n⚠️ **Warning: Incomplete Output**\n\n"
275
+ warning += f"Generated: {len(output_text)} characters | Missing: {', '.join(missing_sections) if missing_sections else 'None'}\n\n"
276
+ warning += "**Suggestions:**\n"
277
+ warning += "- Try regenerating with different parameter values\n"
278
+ warning += "- Consider using a larger model (Llama-3-8B, Mistral-7B)\n"
279
+ warning += "- Or use API-based models for guaranteed quality\n"
280
  output_text += warning
281
 
282
  return output_text