Files changed (1) hide show
  1. app.py +254 -234
app.py CHANGED
@@ -1,234 +1,254 @@
1
- """
2
- Lab Report Decoder - Flask Application
3
- Professional web interface for lab report analysis
4
- """
5
-
6
- from flask import Flask, render_template, request, jsonify, session
7
- from werkzeug.utils import secure_filename
8
- import os
9
- import tempfile
10
- import secrets
11
- from pdf_extractor import LabReportExtractor
12
- from rag_engine import LabReportRAG
13
- from dotenv import load_dotenv
14
-
15
- load_dotenv()
16
-
17
- app = Flask(__name__)
18
- app.secret_key = os.getenv('SECRET_KEY', secrets.token_hex(16))
19
-
20
- # Note: No OpenAI API key needed - using Hugging Face models!
21
- app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
22
- app.config['UPLOAD_FOLDER'] = tempfile.gettempdir()
23
-
24
- # Initialize RAG system (singleton)
25
- rag_system = None
26
-
27
- def get_rag_system():
28
- """Lazy load RAG system"""
29
- global rag_system
30
- if rag_system is None:
31
- rag_system = LabReportRAG()
32
- return rag_system
33
-
34
- @app.route('/')
35
- def index():
36
- """Main page"""
37
- return render_template('index.html')
38
-
39
- @app.route('/api/upload', methods=['POST'])
40
- def upload_file():
41
- """Handle PDF upload and extraction"""
42
- try:
43
- if 'file' not in request.files:
44
- return jsonify({'error': 'No file provided'}), 400
45
-
46
- file = request.files['file']
47
-
48
- if file.filename == '':
49
- return jsonify({'error': 'No file selected'}), 400
50
-
51
- if not file.filename.lower().endswith('.pdf'):
52
- return jsonify({'error': 'Only PDF files are allowed'}), 400
53
-
54
- # Save file temporarily
55
- filename = secure_filename(file.filename)
56
- filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
57
- file.save(filepath)
58
-
59
- try:
60
- # Extract lab results
61
- extractor = LabReportExtractor()
62
- results = extractor.extract_from_pdf(filepath)
63
-
64
- if not results:
65
- return jsonify({'error': 'No lab results found in PDF'}), 400
66
-
67
- # Convert to JSON-serializable format
68
- results_data = [
69
- {
70
- 'test_name': r.test_name,
71
- 'value': r.value,
72
- 'unit': r.unit,
73
- 'reference_range': r.reference_range,
74
- 'status': r.status
75
- }
76
- for r in results
77
- ]
78
-
79
- # Store in session
80
- session['results'] = results_data
81
-
82
- return jsonify({
83
- 'success': True,
84
- 'results': results_data,
85
- 'count': len(results_data)
86
- })
87
-
88
- finally:
89
- # Clean up temp file
90
- if os.path.exists(filepath):
91
- os.remove(filepath)
92
-
93
- except Exception as e:
94
- return jsonify({'error': str(e)}), 500
95
-
96
- @app.route('/api/explain', methods=['POST'])
97
- def explain_results():
98
- """Generate explanations for lab results"""
99
- try:
100
- results_data = session.get('results')
101
-
102
- if not results_data:
103
- return jsonify({'error': 'No results found. Please upload a PDF first.'}), 400
104
-
105
- # Convert back to LabResult objects
106
- from pdf_extractor import LabResult
107
- results = [
108
- LabResult(
109
- test_name=r['test_name'],
110
- value=r['value'],
111
- unit=r['unit'],
112
- reference_range=r['reference_range'],
113
- status=r['status']
114
- )
115
- for r in results_data
116
- ]
117
-
118
- # Generate explanations
119
- rag = get_rag_system()
120
- explanations = rag.explain_all_results(results)
121
-
122
- return jsonify({
123
- 'success': True,
124
- 'explanations': explanations
125
- })
126
-
127
- except Exception as e:
128
- return jsonify({'error': str(e)}), 500
129
-
130
- @app.route('/api/ask', methods=['POST'])
131
- def ask_question():
132
- """Answer follow-up questions"""
133
- try:
134
- data = request.get_json()
135
- question = data.get('question', '').strip()
136
-
137
- if not question:
138
- return jsonify({'error': 'No question provided'}), 400
139
-
140
- results_data = session.get('results')
141
-
142
- if not results_data:
143
- return jsonify({'error': 'No results found. Please upload a PDF first.'}), 400
144
-
145
- # Convert back to LabResult objects
146
- from pdf_extractor import LabResult
147
- results = [
148
- LabResult(
149
- test_name=r['test_name'],
150
- value=r['value'],
151
- unit=r['unit'],
152
- reference_range=r['reference_range'],
153
- status=r['status']
154
- )
155
- for r in results_data
156
- ]
157
-
158
- # Get answer
159
- rag = get_rag_system()
160
- answer = rag.answer_followup_question(question, results)
161
-
162
- return jsonify({
163
- 'success': True,
164
- 'question': question,
165
- 'answer': answer
166
- })
167
-
168
- except Exception as e:
169
- return jsonify({'error': str(e)}), 500
170
-
171
- @app.route('/api/summary', methods=['GET'])
172
- def get_summary():
173
- """Generate overall summary"""
174
- try:
175
- results_data = session.get('results')
176
-
177
- if not results_data:
178
- return jsonify({'error': 'No results found. Please upload a PDF first.'}), 400
179
-
180
- # Convert back to LabResult objects
181
- from pdf_extractor import LabResult
182
- results = [
183
- LabResult(
184
- test_name=r['test_name'],
185
- value=r['value'],
186
- unit=r['unit'],
187
- reference_range=r['reference_range'],
188
- status=r['status']
189
- )
190
- for r in results_data
191
- ]
192
-
193
- # Generate summary
194
- rag = get_rag_system()
195
- summary = rag.generate_summary(results)
196
-
197
- # Calculate statistics
198
- stats = {
199
- 'total': len(results),
200
- 'normal': sum(1 for r in results if r.status == 'normal'),
201
- 'high': sum(1 for r in results if r.status == 'high'),
202
- 'low': sum(1 for r in results if r.status == 'low'),
203
- 'unknown': sum(1 for r in results if r.status == 'unknown')
204
- }
205
-
206
- return jsonify({
207
- 'success': True,
208
- 'summary': summary,
209
- 'stats': stats
210
- })
211
-
212
- except Exception as e:
213
- return jsonify({'error': str(e)}), 500
214
-
215
- @app.route('/api/clear', methods=['POST'])
216
- def clear_session():
217
- """Clear session data"""
218
- session.clear()
219
- return jsonify({'success': True})
220
-
221
- @app.errorhandler(413)
222
- def request_entity_too_large(error):
223
- return jsonify({'error': 'File too large. Maximum size is 16MB.'}), 413
224
-
225
- @app.errorhandler(500)
226
- def internal_error(error):
227
- return jsonify({'error': 'Internal server error'}), 500
228
-
229
- if __name__ == '__main__':
230
- if not(os.path.isdir('chroma_db/')):
231
- os.system("python build_vector_db.py")
232
- #any available port
233
- port = int(os.environ.get("PORT", 5000))
234
- app.run(host="0.0.0.0", port=port)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Lab Report Decoder - Flask Application
3
+ Professional web interface for lab report analysis
4
+ Fixed for Hugging Face Spaces stateless environment
5
+ """
6
+
7
+ from flask import Flask, render_template, request, jsonify
8
+ from werkzeug.utils import secure_filename
9
+ import os
10
+ import tempfile
11
+ import secrets
12
+ import json
13
+ import uuid
14
+ from pdf_extractor import LabReportExtractor, LabResult
15
+ from rag_engine import LabReportRAG
16
+ from dotenv import load_dotenv
17
+
18
+ load_dotenv()
19
+
20
+ app = Flask(__name__)
21
+ app.secret_key = os.getenv('SECRET_KEY', secrets.token_hex(16))
22
+ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
23
+ app.config['UPLOAD_FOLDER'] = tempfile.gettempdir()
24
+
25
+ # Initialize RAG system (singleton)
26
+ rag_system = None
27
+
28
+ # In-memory storage for sessions (better than Flask sessions in HF Spaces)
29
+ session_storage = {}
30
+
31
+ def get_rag_system():
32
+ """Lazy load RAG system"""
33
+ global rag_system
34
+ if rag_system is None:
35
+ print("πŸ”„ Initializing RAG system...")
36
+ rag_system = LabReportRAG()
37
+ print("βœ… RAG system ready")
38
+ return rag_system
39
+
40
+ @app.route('/')
41
+ def index():
42
+ """Main page"""
43
+ return render_template('index.html')
44
+
45
+ @app.route('/api/upload', methods=['POST'])
46
+ def upload_file():
47
+ """Handle PDF upload and extraction"""
48
+ try:
49
+ if 'file' not in request.files:
50
+ return jsonify({'error': 'No file provided'}), 400
51
+
52
+ file = request.files['file']
53
+
54
+ if file.filename == '':
55
+ return jsonify({'error': 'No file selected'}), 400
56
+
57
+ if not file.filename.lower().endswith('.pdf'):
58
+ return jsonify({'error': 'Only PDF files are allowed'}), 400
59
+
60
+ # Save file temporarily
61
+ filename = secure_filename(file.filename)
62
+ filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
63
+ file.save(filepath)
64
+
65
+ try:
66
+ # Extract lab results
67
+ print("πŸ“„ Extracting lab results from PDF...")
68
+ extractor = LabReportExtractor()
69
+ results = extractor.extract_from_pdf(filepath)
70
+
71
+ if not results:
72
+ return jsonify({'error': 'No lab results found in PDF. Please make sure your PDF contains a valid lab report with test names, values, and reference ranges.'}), 400
73
+
74
+ print(f"βœ… Extracted {len(results)} results")
75
+
76
+ # Convert to JSON-serializable format
77
+ results_data = [
78
+ {
79
+ 'test_name': r.test_name,
80
+ 'value': r.value,
81
+ 'unit': r.unit,
82
+ 'reference_range': r.reference_range,
83
+ 'status': r.status
84
+ }
85
+ for r in results
86
+ ]
87
+
88
+ # Generate a unique session ID
89
+ session_id = str(uuid.uuid4())
90
+
91
+ # Store results in memory with session ID
92
+ session_storage[session_id] = {
93
+ 'results': results_data,
94
+ 'results_objects': results # Store LabResult objects for later use
95
+ }
96
+
97
+ print(f"πŸ’Ύ Stored results in session: {session_id}")
98
+
99
+ return jsonify({
100
+ 'success': True,
101
+ 'session_id': session_id,
102
+ 'results': results_data,
103
+ 'count': len(results_data)
104
+ })
105
+
106
+ finally:
107
+ # Clean up temp file
108
+ if os.path.exists(filepath):
109
+ os.remove(filepath)
110
+
111
+ except Exception as e:
112
+ print(f"❌ Upload error: {str(e)}")
113
+ return jsonify({'error': str(e)}), 500
114
+
115
+ @app.route('/api/explain', methods=['POST'])
116
+ def explain_results():
117
+ """Generate explanations for lab results"""
118
+ try:
119
+ data = request.get_json()
120
+ session_id = data.get('session_id')
121
+
122
+ if not session_id or session_id not in session_storage:
123
+ return jsonify({'error': 'Session expired. Please upload your PDF again.'}), 400
124
+
125
+ # Get results from storage
126
+ session_data = session_storage[session_id]
127
+ results = session_data['results_objects']
128
+
129
+ print(f"🧠 Generating explanations for {len(results)} results...")
130
+
131
+ # Generate explanations
132
+ rag = get_rag_system()
133
+ explanations = {}
134
+
135
+ for i, result in enumerate(results):
136
+ print(f" Explaining {i+1}/{len(results)}: {result.test_name}...")
137
+ try:
138
+ explanation = rag.explain_result(result)
139
+ explanations[result.test_name] = explanation
140
+ except Exception as e:
141
+ print(f" Error: {str(e)}")
142
+ explanations[result.test_name] = f"Unable to generate explanation: {str(e)}"
143
+
144
+ # Store explanations in session
145
+ session_storage[session_id]['explanations'] = explanations
146
+
147
+ print("βœ… All explanations generated")
148
+
149
+ return jsonify({
150
+ 'success': True,
151
+ 'explanations': explanations
152
+ })
153
+
154
+ except Exception as e:
155
+ print(f"❌ Explanation error: {str(e)}")
156
+ return jsonify({'error': str(e)}), 500
157
+
158
+ @app.route('/api/ask', methods=['POST'])
159
+ def ask_question():
160
+ """Answer follow-up questions"""
161
+ try:
162
+ data = request.get_json()
163
+ question = data.get('question', '').strip()
164
+ session_id = data.get('session_id')
165
+
166
+ if not question:
167
+ return jsonify({'error': 'No question provided'}), 400
168
+
169
+ if not session_id or session_id not in session_storage:
170
+ return jsonify({'error': 'Session expired. Please upload your PDF again.'}), 400
171
+
172
+ # Get results from storage
173
+ session_data = session_storage[session_id]
174
+ results = session_data['results_objects']
175
+
176
+ print(f"πŸ’¬ Answering question: {question}")
177
+
178
+ # Get answer
179
+ rag = get_rag_system()
180
+ answer = rag.answer_followup_question(question, results)
181
+
182
+ print("βœ… Answer generated")
183
+
184
+ return jsonify({
185
+ 'success': True,
186
+ 'question': question,
187
+ 'answer': answer
188
+ })
189
+
190
+ except Exception as e:
191
+ print(f"❌ Question error: {str(e)}")
192
+ return jsonify({'error': str(e)}), 500
193
+
194
+ @app.route('/api/summary', methods=['POST'])
195
+ def get_summary():
196
+ """Generate overall summary"""
197
+ try:
198
+ data = request.get_json()
199
+ session_id = data.get('session_id')
200
+
201
+ if not session_id or session_id not in session_storage:
202
+ return jsonify({'error': 'Session expired. Please upload your PDF again.'}), 400
203
+
204
+ # Get results from storage
205
+ session_data = session_storage[session_id]
206
+ results = session_data['results_objects']
207
+
208
+ print("πŸ“Š Generating summary...")
209
+
210
+ # Generate summary
211
+ rag = get_rag_system()
212
+ summary = rag.generate_summary(results)
213
+
214
+ # Calculate statistics
215
+ stats = {
216
+ 'total': len(results),
217
+ 'normal': sum(1 for r in results if r.status == 'normal'),
218
+ 'high': sum(1 for r in results if r.status == 'high'),
219
+ 'low': sum(1 for r in results if r.status == 'low'),
220
+ 'unknown': sum(1 for r in results if r.status == 'unknown')
221
+ }
222
+
223
+ print(f"βœ… Summary generated - Stats: {stats}")
224
+
225
+ return jsonify({
226
+ 'success': True,
227
+ 'summary': summary,
228
+ 'stats': stats
229
+ })
230
+
231
+ except Exception as e:
232
+ print(f"❌ Summary error: {str(e)}")
233
+ return jsonify({'error': str(e)}), 500
234
+
235
+ @app.route('/api/health', methods=['GET'])
236
+ def health_check():
237
+ """Health check endpoint"""
238
+ return jsonify({
239
+ 'status': 'healthy',
240
+ 'active_sessions': len(session_storage)
241
+ })
242
+
243
+ @app.errorhandler(413)
244
+ def request_entity_too_large(error):
245
+ return jsonify({'error': 'File too large. Maximum size is 16MB.'}), 413
246
+
247
+ @app.errorhandler(500)
248
+ def internal_error(error):
249
+ return jsonify({'error': 'Internal server error'}), 500
250
+
251
+ if __name__ == '__main__':
252
+ print("πŸš€ Starting Lab Report Decoder...")
253
+ print("πŸ“ Server will be available at http://0.0.0.0:7860")
254
+ app.run(debug=True, host='0.0.0.0', port=7860)