sikeaditya commited on
Commit
b8074ac
·
verified ·
1 Parent(s): 5544199

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -0
app.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from flask import Flask, request, render_template, redirect, flash, url_for
4
+ from werkzeug.utils import secure_filename
5
+ import PyPDF2
6
+ from PIL import Image
7
+ import io
8
+ import base64
9
+ from google import genai
10
+ import pytesseract # OCR library for extracting text from images
11
+ import markdown # add this import at the top
12
+
13
+ app = Flask(__name__)
14
+ app.secret_key = 'your_secret_key_here'
15
+ logging.basicConfig(level=logging.DEBUG)
16
+
17
+ # Configure upload settings
18
+ UPLOAD_FOLDER = 'uploads'
19
+ ALLOWED_EXTENSIONS = {'pdf', 'jpg', 'jpeg', 'png'}
20
+ MAX_FILE_SIZE = 20 * 1024 * 1024 # 20MB max file size
21
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
22
+ app.config['MAX_CONTENT_LENGTH'] = MAX_FILE_SIZE
23
+
24
+ # Ensure the upload folder exists
25
+ if not os.path.exists(UPLOAD_FOLDER):
26
+ os.makedirs(UPLOAD_FOLDER)
27
+
28
+ def allowed_file(filename):
29
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
30
+
31
+ def extract_images_from_pdf(pdf_path):
32
+ """Extract images from PDF file and convert to base64"""
33
+ images_data = []
34
+ try:
35
+ with open(pdf_path, "rb") as pdf_file:
36
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
37
+ for page in pdf_reader.pages:
38
+ for image_file_object in page.images:
39
+ try:
40
+ image = Image.open(io.BytesIO(image_file_object.data))
41
+ # Convert image to base64
42
+ buffered = io.BytesIO()
43
+ image.save(buffered, format="PNG")
44
+ img_str = base64.b64encode(buffered.getvalue()).decode()
45
+ images_data.append(img_str)
46
+ except Exception as e:
47
+ logging.warning(f"Failed to process image: {str(e)}")
48
+ except Exception as e:
49
+ logging.error(f"Error extracting images: {str(e)}")
50
+ return images_data
51
+
52
+ def extract_text_from_image(image_path):
53
+ """Extract text from image using OCR"""
54
+ try:
55
+ image = Image.open(image_path)
56
+ text = pytesseract.image_to_string(image)
57
+ return text
58
+ except Exception as e:
59
+ logging.error(f"Error extracting text from image: {str(e)}")
60
+ return ""
61
+
62
+ def get_scan_type(text):
63
+ """Determine the type of scan from the text content"""
64
+ text_lower = text.lower()
65
+ if 'ct' in text_lower or 'computed tomography' in text_lower:
66
+ return 'CT'
67
+ elif 'ecg' in text_lower or 'electrocardiogram' in text_lower:
68
+ return 'ECG'
69
+ return 'Unknown'
70
+
71
+ @app.route('/', methods=['GET', 'POST'])
72
+ def index():
73
+ result = None
74
+ if request.method == 'POST':
75
+ logging.debug("Received a POST request.")
76
+
77
+ if 'file' not in request.files:
78
+ logging.debug("No file part found in the request.")
79
+ flash('No file selected', 'error')
80
+ return redirect(request.url)
81
+
82
+ file = request.files['file']
83
+
84
+ if file.filename == '':
85
+ logging.debug("No file selected for uploading.")
86
+ flash('No file selected', 'error')
87
+ return redirect(request.url)
88
+
89
+ if request.content_length > MAX_FILE_SIZE:
90
+ flash(f'File size exceeds {MAX_FILE_SIZE // (1024 * 1024)}MB limit', 'error')
91
+ return redirect(request.url)
92
+
93
+ if file and allowed_file(file.filename):
94
+ try:
95
+ filename = secure_filename(file.filename)
96
+ file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
97
+ logging.debug(f"Saving file to {file_path}")
98
+ file.save(file_path)
99
+
100
+ # Extract text
101
+ text = ""
102
+ with open(file_path, "rb") as pdf_file:
103
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
104
+ for page_num, page in enumerate(pdf_reader.pages):
105
+ page_text = page.extract_text()
106
+ if page_text:
107
+ text += page_text
108
+ logging.debug(f"Extracted text from page {page_num}")
109
+
110
+ # Handle image upload
111
+ image_text = ""
112
+ if 'image' in request.files:
113
+ image_file = request.files['image']
114
+ if image_file.filename != '' and allowed_file(image_file.filename):
115
+ image_filename = secure_filename(image_file.filename)
116
+ image_path = os.path.join(app.config['UPLOAD_FOLDER'], image_filename)
117
+ image_file.save(image_path)
118
+ image_text = extract_text_from_image(image_path)
119
+ logging.debug(f"Extracted text from image: {image_text}")
120
+
121
+ # Combine text from PDF and image
122
+ combined_text = text + " " + image_text
123
+
124
+ # Determine scan type
125
+ scan_type = get_scan_type(combined_text)
126
+
127
+ # Extract images
128
+ images_data = extract_images_from_pdf(file_path)
129
+ image_count = len(images_data)
130
+ logging.debug(f"Extracted {image_count} images from PDF")
131
+
132
+ # Construct the content for Gemini API
133
+ prompt = f"""
134
+ You are a professional medical imaging specialist analyzing a {scan_type} scan report.
135
+ Please provide a comprehensive analysis of this report just tell directly about following points no intro , including:
136
+ 1. Key findings and observations
137
+ 2. Any significant abnormalities or concerns
138
+ 3. Technical quality of the scan
139
+ 4. Recommendations for follow-up (if any)
140
+
141
+ The report contains {image_count} images and the following text content:
142
+
143
+ {combined_text}
144
+
145
+ Please note any limitations in your analysis if the image quality or content is unclear.
146
+ """
147
+
148
+ # Initialize Gemini client
149
+ client = genai.Client(api_key="AIzaSyArihOGcyK5KcQR4ntIqNga6bSoq7kM7Yo")
150
+
151
+ logging.debug("Calling Gemini API for content generation.")
152
+ response = client.models.generate_content(
153
+ model="gemini-2.0-flash",
154
+ contents=prompt
155
+ )
156
+
157
+ analysis_text = response.text
158
+ analysis_html = markdown.markdown(analysis_text) # convert markdown to HTML
159
+
160
+ result = {
161
+ 'analysis': analysis_text, # original text (if needed)
162
+ 'analysis_html': analysis_html, # HTML version for rendering
163
+ 'scan_type': scan_type,
164
+ 'image_count': image_count,
165
+ 'images': images_data
166
+ }
167
+ logging.debug("Received response from Gemini API.")
168
+
169
+ except Exception as e:
170
+ logging.exception("Error processing file:")
171
+ flash(f"Error processing file: {str(e)}", 'error')
172
+ return redirect(request.url)
173
+ finally:
174
+ # Clean up
175
+ if os.path.exists(file_path):
176
+ os.remove(file_path)
177
+ logging.debug("Temporary file removed after processing.")
178
+ else:
179
+ flash('Only PDF and image files are allowed', 'error')
180
+ return redirect(request.url)
181
+
182
+ return render_template("index.html", result=result)
183
+
184
+ if __name__ == '__main__':
185
+ app.run(debug=True)