rairo commited on
Commit
5fa6c98
·
verified ·
1 Parent(s): c26b237

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +317 -338
main.py CHANGED
@@ -2,372 +2,351 @@ from flask import Flask, request, jsonify
2
  import os
3
  import json
4
  import time
 
 
5
  from flask_cors import CORS
6
  from google import genai
7
- from google.genai import types
8
- from exa_py import Exa
9
- from linkup import LinkupClient
 
10
 
11
  app = Flask(__name__)
12
  CORS(app)
13
 
14
- # Environment variables
15
- GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
16
- if not GOOGLE_API_KEY:
17
- raise ValueError("GOOGLE_API_KEY environment variable is not set.")
18
-
19
- EXA_API_KEY = os.environ.get("EXA_API_KEY")
20
- if not EXA_API_KEY:
21
- raise ValueError("EXA_API_KEY environment variable is not set.")
22
-
23
- LINKUP_API_KEY = os.environ.get("LINKUP_API_KEY")
24
- if not LINKUP_API_KEY:
25
- raise ValueError("LINKUP_API_KEY environment variable is not set.")
26
-
27
- # Initialize clients
28
- exa = Exa(api_key=EXA_API_KEY)
29
- linkup_client = LinkupClient(api_key=LINKUP_API_KEY)
30
-
31
-
32
- def get_data(search_term):
33
- """
34
- Run the Linkup deep search for a given search term.
35
- If a rate-limit error occurs, wait 10 seconds and retry.
36
- """
37
- full_query = f"{search_term} grants funding opportunities"
38
-
39
- print("\n=== DEBUG: Start get_data() ===")
40
- print(f"Search Term: {search_term}")
41
- print(f"Full Query: {full_query}\n")
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  try:
44
- response = linkup_client.search(
45
- query=full_query,
46
- depth="deep",
47
- output_type="sourcedAnswer",
48
- include_images=False,
49
- )
50
 
51
- print("\n=== DEBUG: Raw result from linkup search ===")
52
- print(response)
53
- print("===========================================")
54
-
55
- # Extract the answer content from Linkup response
56
- content = ""
57
- if hasattr(response, 'answer'):
58
- content = response.answer
59
- elif isinstance(response, dict) and 'answer' in response:
60
- content = response['answer']
61
- else:
62
- content = str(response)
63
-
64
- # Process the content with Gemini AI to extract structured grant data
65
- structured_prompt = (
66
- f"Based on the following search results about {search_term} grants, "
67
- "extract and structure grant information with:\n"
68
- "- Grant name/title\n"
69
- "- Short summary \n"
70
- "- Funding organization\n"
71
- "- Grant value (numeric only)\n"
72
- "- Application deadline\n"
73
- "- Eligible countries\n"
74
- "- Sector/field\n"
75
- "- Eligibility criteria\n"
76
- "- link URL\n"
77
- "Return in JSON format with a 'grants' array.\n\n"
78
- f"Search results: {content}"
79
- )
80
 
81
- client = genai.Client(api_key=GOOGLE_API_KEY)
82
- gemini_response = client.models.generate_content(
83
- model="models/gemini-2.0-flash-lite",
84
- contents=f"{structured_prompt}, return the json string and nothing else"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  )
86
 
87
- gemini_text = gemini_response.text
88
- print(f"DEBUG: Gemini response: {gemini_text}")
89
-
90
- # Parse JSON from Gemini response
91
- try:
92
- # Try to find JSON in the response
93
- start_index = gemini_text.find('{')
94
- if start_index == -1:
95
- start_index = gemini_text.find('[')
96
-
97
- if start_index != -1:
98
- if gemini_text[start_index] == '{':
99
- end_index = gemini_text.rfind('}') + 1
100
- else:
101
- end_index = gemini_text.rfind(']') + 1
102
-
103
- json_string = gemini_text[start_index:end_index]
104
- result = json.loads(json_string)
105
-
106
- # Ensure result has grants array
107
- if isinstance(result, list):
108
- result = {"grants": result}
109
- elif isinstance(result, dict) and "grants" not in result:
110
- # If it's a dict but no grants key, assume it's a single grant
111
- result = {"grants": [result]}
112
- else:
113
- result = {"grants": []}
114
- except json.JSONDecodeError as je:
115
- print(f"ERROR: Failed to parse JSON from Gemini response: {je}")
116
- result = {"grants": []}
117
-
118
- if not result or "grants" not in result or not result["grants"]:
119
- print(f"DEBUG: No grants found for '{search_term}'.")
120
- return {"error": f"No results returned for '{search_term}'. Please try again with a different search term."}
121
-
122
- print("DEBUG: Grants found, returning results.")
123
- return result
124
-
125
  except Exception as e:
126
- err_str = str(e)
127
- print(f"ERROR: Exception occurred - {err_str}")
128
-
129
- # Check for rate limiting or similar errors
130
- if "rate" in err_str.lower() or "limit" in err_str.lower():
131
- print("DEBUG: Rate limit detected. Retrying in 10 seconds...")
132
- time.sleep(10)
133
- try:
134
- response = linkup_client.search(
135
- query=full_query,
136
- depth="deep",
137
- output_type="sourcedAnswer",
138
- include_images=False,
139
- )
140
-
141
- # Process retry response similar to above
142
- content = ""
143
- if hasattr(response, 'answer'):
144
- content = response.answer
145
- elif isinstance(response, dict) and 'answer' in response:
146
- content = response['answer']
147
- else:
148
- content = str(response)
149
-
150
- structured_prompt = (
151
- f"Based on the following search results about {search_term} grants, "
152
- "extract and structure grant information with:\n"
153
- "- Grant name/title\n"
154
- "- Short summary \n"
155
- "- Funding organization\n"
156
- "- Grant value (numeric only)\n"
157
- "- Application deadline\n"
158
- "- Eligible countries\n"
159
- "- Sector/field\n"
160
- "- Eligibility criteria\n"
161
- "- link URL\n"
162
- "Return in JSON format with a 'grants' array.\n\n"
163
- f"Search results: {content}"
164
- )
165
-
166
- client = genai.Client(api_key=GOOGLE_API_KEY)
167
- gemini_response = client.models.generate_content(
168
- model="models/gemini-2.0-flash-lite",
169
- contents=f"{structured_prompt}, return the json string and nothing else"
170
- )
171
-
172
- gemini_text = gemini_response.text
173
-
174
- try:
175
- start_index = gemini_text.find('{')
176
- if start_index == -1:
177
- start_index = gemini_text.find('[')
178
-
179
- if start_index != -1:
180
- if gemini_text[start_index] == '{':
181
- end_index = gemini_text.rfind('}') + 1
182
- else:
183
- end_index = gemini_text.rfind(']') + 1
184
-
185
- json_string = gemini_text[start_index:end_index]
186
- result = json.loads(json_string)
187
-
188
- if isinstance(result, list):
189
- result = {"grants": result}
190
- elif isinstance(result, dict) and "grants" not in result:
191
- result = {"grants": [result]}
192
- else:
193
- result = {"grants": []}
194
- except json.JSONDecodeError:
195
- result = {"grants": []}
196
-
197
- if not result or "grants" not in result or not result["grants"]:
198
- print(f"DEBUG: No grants found after retry for '{search_term}'.")
199
- return {"error": f"No results returned for '{search_term}' after retry. Please try again with a different search term."}
200
-
201
- print("DEBUG: Grants found on retry, returning results.")
202
- return result
203
-
204
- except Exception as e2:
205
- print(f"ERROR: Retry failed - {str(e2)}")
206
- return {"error": f"Retry failed for '{search_term}': {str(e2)}. Please try again later."}
207
- else:
208
- return {"error": f"An error occurred for '{search_term}': {str(e)}. Please try again."}
209
-
210
-
211
- def process_multiple_search_terms(search_terms):
212
- """
213
- Process multiple search terms and aggregate results.
214
- Returns a dictionary with a 'grants' key containing combined results.
215
- """
216
- all_data = {"grants": []}
217
- for term in search_terms:
218
- term = term.strip()
219
- if not term:
220
- continue
221
- result = get_data(term)
222
- if result and result.get("grants"):
223
- all_data["grants"].extend(result["grants"])
224
- return all_data
225
-
226
-
227
- @app.route("/scrape", methods=["POST"])
228
- def scrape():
229
- """
230
- Endpoint to scrape grant opportunities using search terms.
231
- Expects a JSON body with the key 'search_terms' (a string with newline-separated search terms
232
- or a list of strings). Returns JSON with the aggregated results.
233
- """
234
- data = request.get_json()
235
- if not data or "search_terms" not in data:
236
- return jsonify({"error": "Request must include 'search_terms' key."}), 400
237
-
238
- search_terms = data["search_terms"]
239
- if isinstance(search_terms, str):
240
- search_terms = [s.strip() for s in search_terms.split("\n") if s.strip()]
241
- elif not isinstance(search_terms, list):
242
- return jsonify({"error": "'search_terms' must be a string or list of strings."}), 400
243
-
244
- if not search_terms:
245
- return jsonify({"error": "No valid search terms provided."}), 400
246
-
247
- result = process_multiple_search_terms(search_terms)
248
- return jsonify(result), 200
249
-
250
-
251
- def get_data_from_url(url):
252
- """
253
- Scrape the provided URL using Exa API.
254
- Extract grant data using Gemini AI.
255
- """
256
- print(f"\n=== DEBUG: Start get_data_from_url() ===")
257
- print(f"URL: {url}")
258
 
 
 
259
  try:
260
- # Use Exa to get content from URL
261
- result = exa.get_contents(
262
- [url],
263
- text=True
 
 
 
 
 
 
 
 
 
 
 
264
  )
265
 
266
- print("\n=== DEBUG: Raw result from Exa ===")
267
- print(result)
268
- print("=====================================")
269
-
270
- # Extract text content from Exa response
271
- page_content = ""
272
- if hasattr(result, 'results') and result.results:
273
- page_content = result.results[0].text if hasattr(result.results[0], 'text') else str(result.results[0])
274
- elif isinstance(result, dict) and 'results' in result and result['results']:
275
- page_content = result['results'][0].get('text', str(result['results'][0]))
276
- else:
277
- page_content = str(result)
278
-
279
- if not page_content:
280
- print("ERROR: No content extracted from URL")
281
- return {}
 
 
282
 
283
- print(f"DEBUG: Extracted content length: {len(page_content)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
- # Process content with Gemini AI
286
- full_prompt = (
287
- "Extract the following grant data from the provided web content. "
288
- "- Grant name/title\n"
289
- "- Short summary\n"
290
- "- Funding organization\n"
291
- "- Grant value (numeric only)\n"
292
- "- Application deadline\n"
293
- "- Eligible countries\n"
294
- "- Sector/field\n"
295
- "- Eligibility criteria\n"
296
- "Return in JSON format with a 'grants' array.\n\n"
297
- f"Web content: {page_content[:10000]}" # Limit content to avoid token limits
298
- )
299
 
300
- client = genai.Client(api_key=GOOGLE_API_KEY)
301
- gemini_response = client.models.generate_content(
302
- model="models/gemini-2.0-flash-lite",
303
- contents=f"{full_prompt}, return the json string and nothing else"
 
 
 
304
  )
305
 
306
- response_text = gemini_response.text
307
- print(f"DEBUG: Gemini response: {response_text}")
 
 
 
 
 
 
 
 
 
 
 
 
308
 
309
- # Extract JSON output from Gemini
310
- try:
311
- start_index = response_text.find('[')
312
- if start_index == -1:
313
- start_index = response_text.find('{')
314
-
315
- if start_index != -1:
316
- if response_text[start_index] == '[':
317
- end_index = response_text.rfind(']') + 1
318
- else:
319
- end_index = response_text.rfind('}') + 1
 
 
 
 
 
 
 
 
320
 
321
- json_string = response_text[start_index:end_index]
322
- parsed_result = json.loads(json_string)
 
323
 
324
- # Ensure JSON is wrapped correctly
325
- if isinstance(parsed_result, list):
326
- parsed_result = {"grants": parsed_result}
327
- elif isinstance(parsed_result, dict) and "grants" not in parsed_result:
328
- # If it's a dict but no grants key, assume it's a single grant
329
- parsed_result = {"grants": [parsed_result]}
330
- else:
331
- parsed_result = {"grants": []}
332
 
333
- except Exception as parse_error:
334
- print(f"Error parsing JSON from Gemini model response: {parse_error}")
335
- print(f"Response: {response_text}")
336
- return {}
337
-
338
- if not parsed_result.get("grants"):
339
- print("No grant opportunities found in the scraped URL.")
340
- return {}
341
-
342
- print(f"DEBUG: Found {len(parsed_result['grants'])} grants")
343
- if parsed_result['grants']:
344
- print(f"First grant opportunity: {parsed_result['grants'][0]}")
345
 
346
- return parsed_result
347
-
348
  except Exception as e:
349
- print(f"ERROR: Exception in get_data_from_url: {str(e)}")
350
- return {}
351
-
352
-
353
- @app.route("/scrape_url", methods=["POST"])
354
- def scrape_url():
355
- """
356
- Endpoint to scrape a provided URL for grant opportunities.
357
- Expects a JSON body with the key 'url'.
358
- Returns the scraped and processed grant data in JSON format.
359
- """
360
- data = request.get_json()
361
- if not data or "url" not in data:
362
- return jsonify({"error": "Request must include 'url' key."}), 400
363
-
364
- url = data["url"]
365
- result = get_data_from_url(url)
366
- if not result:
367
- return jsonify({"error": "Failed to scrape URL or no grants found."}), 500
368
-
369
- return jsonify(result), 200
370
-
 
 
 
 
 
 
 
 
 
371
 
372
  if __name__ == "__main__":
373
  app.run(debug=True, host="0.0.0.0", port=7860)
 
2
  import os
3
  import json
4
  import time
5
+ import base64
6
+ import uuid
7
  from flask_cors import CORS
8
  from google import genai
9
+ from PIL import Image
10
+ import io
11
+ from typing import List, Dict, Any
12
+ import logging
13
 
14
  app = Flask(__name__)
15
  CORS(app)
16
 
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Configure GenAI
22
+ GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
23
+ if not GOOGLE_API_KEY:
24
+ raise ValueError("GOOGLE_API_KEY environment variable is required")
25
+
26
+ client = genai.Client(api_key=GOOGLE_API_KEY)
27
+
28
+ # In-memory storage for multi-part receipts (use Redis/database in production)
29
+ receipt_sessions = {}
30
+
31
+ RECEIPT_ANALYSIS_PROMPT = """
32
+ Analyze this receipt image and extract the following information in JSON format:
33
+ - items: List of items with their details
34
+ - receipt_date: Date from the receipt (YYYY-MM-DD format)
35
+ - total_amount: Total amount from receipt
36
+ - store_name: Name of the store/merchant
37
+
38
+ For each item, provide:
39
+ - name: Item name/description
40
+ - quantity: Quantity purchased (default to 1 if not specified)
41
+ - unit_price: Price per unit
42
+ - total_price: Total price for this item
43
+ - category: Categorize as either "stock" (inventory items, products for resale, raw materials) or "expense" (office supplies, utilities, services, consumables)
44
+
45
+ Use your best judgment to categorize items:
46
+ - "stock": Products intended for sale, raw materials, inventory items
47
+ - "expense": Office supplies, utilities, services, maintenance, consumables
48
+
49
+ Return only valid JSON without any markdown formatting or code blocks.
50
+ """
51
+
52
+ MULTI_PART_ANALYSIS_PROMPT = """
53
+ Analyze these multiple images of the same receipt and extract all information in JSON format:
54
+ - items: Complete list of all items from all images
55
+ - receipt_date: Date from the receipt (YYYY-MM-DD format)
56
+ - total_amount: Total amount from receipt
57
+ - store_name: Name of the store/merchant
58
+
59
+ For each item, provide:
60
+ - name: Item name/description
61
+ - quantity: Quantity purchased (default to 1 if not specified)
62
+ - unit_price: Price per unit
63
+ - total_price: Total price for this item
64
+ - category: Categorize as either "stock" (inventory items, products for resale, raw materials) or "expense" (office supplies, utilities, services, consumables)
65
+
66
+ Combine information from all images to create a complete receipt analysis.
67
+ Return only valid JSON without any markdown formatting or code blocks.
68
+ """
69
+
70
+ def encode_image_to_base64(image_data):
71
+ """Convert image data to base64 string."""
72
  try:
73
+ if isinstance(image_data, str):
74
+ # If it's already base64, return as is
75
+ return image_data
 
 
 
76
 
77
+ # Convert bytes to base64
78
+ return base64.b64encode(image_data).decode('utf-8')
79
+ except Exception as e:
80
+ logger.error(f"Error encoding image: {str(e)}")
81
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ def process_single_receipt(image_data, content_type="image/jpeg"):
84
+ """Process a single receipt image."""
85
+ try:
86
+ base64_image = encode_image_to_base64(image_data)
87
+
88
+ # Create the request with the image
89
+ response = client.models.generate_content(
90
+ model='gemini-2.0-flash',
91
+ contents=[
92
+ {
93
+ 'parts': [
94
+ {'text': RECEIPT_ANALYSIS_PROMPT},
95
+ {
96
+ 'inline_data': {
97
+ 'mime_type': content_type,
98
+ 'data': base64_image
99
+ }
100
+ }
101
+ ]
102
+ }
103
+ ]
104
  )
105
 
106
+ # Extract and parse the response
107
+ result_text = response.text.strip()
108
+
109
+ # Remove any markdown code block formatting
110
+ if result_text.startswith('```json'):
111
+ result_text = result_text[7:]
112
+ if result_text.endswith('```'):
113
+ result_text = result_text[:-3]
114
+
115
+ result_json = json.loads(result_text.strip())
116
+ return result_json
117
+
118
+ except json.JSONDecodeError as e:
119
+ logger.error(f"JSON parsing error: {str(e)}")
120
+ raise ValueError(f"Failed to parse AI response as JSON: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  except Exception as e:
122
+ logger.error(f"Error processing receipt: {str(e)}")
123
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ def process_multi_part_receipt(images_data, content_types):
126
+ """Process multiple images of the same receipt."""
127
  try:
128
+ parts = [{'text': MULTI_PART_ANALYSIS_PROMPT}]
129
+
130
+ # Add each image to the request
131
+ for i, (image_data, content_type) in enumerate(zip(images_data, content_types)):
132
+ base64_image = encode_image_to_base64(image_data)
133
+ parts.append({
134
+ 'inline_data': {
135
+ 'mime_type': content_type,
136
+ 'data': base64_image
137
+ }
138
+ })
139
+
140
+ response = client.models.generate_content(
141
+ model='gemini-1.5-flash',
142
+ contents=[{'parts': parts}]
143
  )
144
 
145
+ # Extract and parse the response
146
+ result_text = response.text.strip()
147
+
148
+ # Remove any markdown code block formatting
149
+ if result_text.startswith('```json'):
150
+ result_text = result_text[7:]
151
+ if result_text.endswith('```'):
152
+ result_text = result_text[:-3]
153
+
154
+ result_json = json.loads(result_text.strip())
155
+ return result_json
156
+
157
+ except json.JSONDecodeError as e:
158
+ logger.error(f"JSON parsing error: {str(e)}")
159
+ raise ValueError(f"Failed to parse AI response as JSON: {str(e)}")
160
+ except Exception as e:
161
+ logger.error(f"Error processing multi-part receipt: {str(e)}")
162
+ raise
163
 
164
+ @app.route('/process-receipt', methods=['POST'])
165
+ def process_receipt():
166
+ """Process a single receipt image."""
167
+ try:
168
+ if 'image' not in request.files:
169
+ return jsonify({'error': 'No image file provided'}), 400
170
+
171
+ file = request.files['image']
172
+ if file.filename == '':
173
+ return jsonify({'error': 'No image file selected'}), 400
174
+
175
+ # Read image data
176
+ image_data = file.read()
177
+ content_type = file.content_type or 'image/jpeg'
178
+
179
+ # Process the receipt
180
+ result = process_single_receipt(image_data, content_type)
181
+
182
+ return jsonify({
183
+ 'success': True,
184
+ 'data': result,
185
+ 'message': 'Receipt processed successfully'
186
+ })
187
+
188
+ except ValueError as e:
189
+ return jsonify({'error': str(e)}), 400
190
+ except Exception as e:
191
+ logger.error(f"Unexpected error: {str(e)}")
192
+ return jsonify({'error': 'Internal server error'}), 500
193
+
194
+ @app.route('/start-receipt-session', methods=['POST'])
195
+ def start_receipt_session():
196
+ """Start a new multi-part receipt session."""
197
+ session_id = str(uuid.uuid4())
198
+ receipt_sessions[session_id] = {
199
+ 'images': [],
200
+ 'content_types': [],
201
+ 'created_at': time.time()
202
+ }
203
+
204
+ return jsonify({
205
+ 'success': True,
206
+ 'session_id': session_id,
207
+ 'message': 'Receipt session started'
208
+ })
209
+
210
+ @app.route('/add-receipt-part/<session_id>', methods=['POST'])
211
+ def add_receipt_part(session_id):
212
+ """Add an image part to an existing receipt session."""
213
+ try:
214
+ if session_id not in receipt_sessions:
215
+ return jsonify({'error': 'Invalid session ID'}), 404
216
+
217
+ if 'image' not in request.files:
218
+ return jsonify({'error': 'No image file provided'}), 400
219
+
220
+ file = request.files['image']
221
+ if file.filename == '':
222
+ return jsonify({'error': 'No image file selected'}), 400
223
+
224
+ # Read and store image data
225
+ image_data = file.read()
226
+ content_type = file.content_type or 'image/jpeg'
227
+
228
+ receipt_sessions[session_id]['images'].append(image_data)
229
+ receipt_sessions[session_id]['content_types'].append(content_type)
230
+
231
+ return jsonify({
232
+ 'success': True,
233
+ 'parts_count': len(receipt_sessions[session_id]['images']),
234
+ 'message': 'Receipt part added successfully'
235
+ })
236
+
237
+ except Exception as e:
238
+ logger.error(f"Error adding receipt part: {str(e)}")
239
+ return jsonify({'error': 'Internal server error'}), 500
240
 
241
+ @app.route('/process-receipt-session/<session_id>', methods=['POST'])
242
+ def process_receipt_session(session_id):
243
+ """Process all parts of a multi-part receipt."""
244
+ try:
245
+ if session_id not in receipt_sessions:
246
+ return jsonify({'error': 'Invalid session ID'}), 404
247
+
248
+ session_data = receipt_sessions[session_id]
 
 
 
 
 
 
249
 
250
+ if not session_data['images']:
251
+ return jsonify({'error': 'No images in session'}), 400
252
+
253
+ # Process the multi-part receipt
254
+ result = process_multi_part_receipt(
255
+ session_data['images'],
256
+ session_data['content_types']
257
  )
258
 
259
+ # Clean up session
260
+ del receipt_sessions[session_id]
261
+
262
+ return jsonify({
263
+ 'success': True,
264
+ 'data': result,
265
+ 'message': 'Multi-part receipt processed successfully'
266
+ })
267
+
268
+ except ValueError as e:
269
+ return jsonify({'error': str(e)}), 400
270
+ except Exception as e:
271
+ logger.error(f"Error processing receipt session: {str(e)}")
272
+ return jsonify({'error': 'Internal server error'}), 500
273
 
274
+ @app.route('/bulk-process-receipts', methods=['POST'])
275
+ def bulk_process_receipts():
276
+ """Process multiple individual receipts in bulk."""
277
+ try:
278
+ if 'images' not in request.files:
279
+ return jsonify({'error': 'No image files provided'}), 400
280
+
281
+ files = request.files.getlist('images')
282
+ if not files:
283
+ return jsonify({'error': 'No image files selected'}), 400
284
+
285
+ results = []
286
+ errors = []
287
+
288
+ for i, file in enumerate(files):
289
+ try:
290
+ if file.filename == '':
291
+ errors.append(f"File {i+1}: No filename")
292
+ continue
293
 
294
+ # Read image data
295
+ image_data = file.read()
296
+ content_type = file.content_type or 'image/jpeg'
297
 
298
+ # Process the receipt
299
+ result = process_single_receipt(image_data, content_type)
300
+ results.append({
301
+ 'file_index': i + 1,
302
+ 'filename': file.filename,
303
+ 'data': result
304
+ })
 
305
 
306
+ except Exception as e:
307
+ errors.append(f"File {i+1} ({file.filename}): {str(e)}")
308
+
309
+ return jsonify({
310
+ 'success': True,
311
+ 'processed_count': len(results),
312
+ 'error_count': len(errors),
313
+ 'results': results,
314
+ 'errors': errors,
315
+ 'message': f'Bulk processing completed. {len(results)} successful, {len(errors)} errors.'
316
+ })
 
317
 
 
 
318
  except Exception as e:
319
+ logger.error(f"Error in bulk processing: {str(e)}")
320
+ return jsonify({'error': 'Internal server error'}), 500
321
+
322
+ @app.route('/health', methods=['GET'])
323
+ def health_check():
324
+ """Health check endpoint."""
325
+ return jsonify({
326
+ 'status': 'healthy',
327
+ 'timestamp': time.time(),
328
+ 'active_sessions': len(receipt_sessions)
329
+ })
330
+
331
+ @app.route('/cleanup-sessions', methods=['POST'])
332
+ def cleanup_old_sessions():
333
+ """Clean up old receipt sessions (older than 1 hour)."""
334
+ current_time = time.time()
335
+ cutoff_time = current_time - 3600 # 1 hour
336
+
337
+ old_sessions = [
338
+ session_id for session_id, data in receipt_sessions.items()
339
+ if data['created_at'] < cutoff_time
340
+ ]
341
+
342
+ for session_id in old_sessions:
343
+ del receipt_sessions[session_id]
344
+
345
+ return jsonify({
346
+ 'success': True,
347
+ 'cleaned_sessions': len(old_sessions),
348
+ 'remaining_sessions': len(receipt_sessions)
349
+ })
350
 
351
  if __name__ == "__main__":
352
  app.run(debug=True, host="0.0.0.0", port=7860)