quantumbit commited on
Commit
3810d8e
·
verified ·
1 Parent(s): 492cbf7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +254 -245
app.py CHANGED
@@ -1,245 +1,254 @@
1
- """
2
- FastAPI Server for Invoice Information Extractor
3
- Provides REST API for invoice processing
4
- """
5
-
6
- from fastapi import FastAPI, File, UploadFile, HTTPException, Form
7
- from fastapi.responses import JSONResponse
8
- from fastapi.middleware.cors import CORSMiddleware
9
- from contextlib import asynccontextmanager
10
- from typing import Optional
11
- import tempfile
12
- import os
13
- import shutil
14
-
15
- from config import API_TITLE, API_DESCRIPTION, API_VERSION
16
- from model_manager import model_manager
17
- from inference import InferenceProcessor
18
-
19
-
20
- @asynccontextmanager
21
- async def lifespan(app: FastAPI):
22
- """Lifecycle manager - loads models on startup"""
23
- print("🚀 Starting Invoice Information Extractor API...")
24
- print("=" * 60)
25
-
26
- # Load models on startup
27
- try:
28
- model_manager.load_models()
29
- print("=" * 60)
30
- print("✅ API is ready to accept requests!")
31
- print("=" * 60)
32
- except Exception as e:
33
- print(f"❌ Failed to load models: {str(e)}")
34
- raise
35
-
36
- yield
37
-
38
- # Cleanup on shutdown
39
- print("🛑 Shutting down API...")
40
-
41
-
42
- # Initialize FastAPI app
43
- app = FastAPI(
44
- title=API_TITLE,
45
- description=API_DESCRIPTION,
46
- version=API_VERSION,
47
- lifespan=lifespan
48
- )
49
-
50
- # Add CORS middleware
51
- app.add_middleware(
52
- CORSMiddleware,
53
- allow_origins=["*"],
54
- allow_credentials=True,
55
- allow_methods=["*"],
56
- allow_headers=["*"],
57
- )
58
-
59
-
60
- @app.get("/")
61
- async def root():
62
- """Root endpoint - API information"""
63
- return {
64
- "name": API_TITLE,
65
- "version": API_VERSION,
66
- "status": "running",
67
- "models_loaded": model_manager.is_loaded(),
68
- "endpoints": {
69
- "health": "/health",
70
- "extract": "/extract (POST)",
71
- "docs": "/docs"
72
- }
73
- }
74
-
75
-
76
- @app.get("/health")
77
- async def health_check():
78
- """Health check endpoint"""
79
- return {
80
- "status": "healthy",
81
- "models_loaded": model_manager.is_loaded()
82
- }
83
-
84
-
85
- @app.post("/extract")
86
- async def extract_invoice(
87
- file: UploadFile = File(..., description="Invoice image file (JPG, PNG, JPEG)"),
88
- doc_id: Optional[str] = Form(None, description="Optional document identifier")
89
- ):
90
- """
91
- Extract information from invoice image
92
-
93
- **Parameters:**
94
- - **file**: Invoice image file (required)
95
- - **doc_id**: Optional document identifier (auto-generated from filename if not provided)
96
-
97
- **Returns:**
98
- - JSON with extracted fields, confidence scores, and metadata
99
-
100
- **Example Response:**
101
- ```json
102
- {
103
- "doc_id": "invoice_001",
104
- "fields": {
105
- "dealer_name": "ABC Tractors Pvt Ltd",
106
- "model_name": "Mahindra 575 DI",
107
- "horse_power": 50,
108
- "asset_cost": 525000,
109
- "signature": {"present": true, "bbox": [100, 200, 300, 250]},
110
- "stamp": {"present": true, "bbox": [400, 500, 500, 550]}
111
- },
112
- "confidence": 0.89,
113
- "processing_time_sec": 3.8,
114
- "cost_estimate_usd": 0.000528
115
- }
116
- ```
117
- """
118
-
119
- # Validate file type
120
- if not file.content_type.startswith("image/"):
121
- raise HTTPException(
122
- status_code=400,
123
- detail="File must be an image (JPG, PNG, JPEG)"
124
- )
125
-
126
- # Check if models are loaded
127
- if not model_manager.is_loaded():
128
- raise HTTPException(
129
- status_code=503,
130
- detail="Models not loaded. Please wait for server initialization."
131
- )
132
-
133
- # Save uploaded file to temporary location
134
- temp_file = None
135
- try:
136
- # Create temporary file
137
- suffix = os.path.splitext(file.filename)[1]
138
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp:
139
- temp_file = temp.name
140
- # Write uploaded file content
141
- shutil.copyfileobj(file.file, temp)
142
-
143
- # Use filename as doc_id if not provided
144
- if doc_id is None:
145
- doc_id = os.path.splitext(file.filename)[0]
146
-
147
- # Process invoice
148
- result = InferenceProcessor.process_invoice(temp_file, doc_id)
149
-
150
- return JSONResponse(content=result)
151
-
152
- except Exception as e:
153
- raise HTTPException(
154
- status_code=500,
155
- detail=f"Error processing invoice: {str(e)}"
156
- )
157
-
158
- finally:
159
- # Clean up temporary file
160
- if temp_file and os.path.exists(temp_file):
161
- try:
162
- os.unlink(temp_file)
163
- except:
164
- pass
165
-
166
- # Close uploaded file
167
- file.file.close()
168
-
169
-
170
- @app.post("/extract_batch")
171
- async def extract_batch(
172
- files: list[UploadFile] = File(..., description="Multiple invoice images")
173
- ):
174
- """
175
- Extract information from multiple invoice images
176
-
177
- **Parameters:**
178
- - **files**: List of invoice image files
179
-
180
- **Returns:**
181
- - JSON array with results for each invoice
182
- """
183
-
184
- if not model_manager.is_loaded():
185
- raise HTTPException(
186
- status_code=503,
187
- detail="Models not loaded. Please wait for server initialization."
188
- )
189
-
190
- results = []
191
- temp_files = []
192
-
193
- try:
194
- for file in files:
195
- # Validate file type
196
- if not file.content_type.startswith("image/"):
197
- results.append({
198
- "filename": file.filename,
199
- "error": "File must be an image"
200
- })
201
- continue
202
-
203
- # Save to temp file
204
- suffix = os.path.splitext(file.filename)[1]
205
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp:
206
- temp_file = temp.name
207
- temp_files.append(temp_file)
208
- shutil.copyfileobj(file.file, temp)
209
-
210
- # Process
211
- try:
212
- doc_id = os.path.splitext(file.filename)[0]
213
- result = InferenceProcessor.process_invoice(temp_file, doc_id)
214
- results.append(result)
215
- except Exception as e:
216
- results.append({
217
- "filename": file.filename,
218
- "error": str(e)
219
- })
220
-
221
- return JSONResponse(content={"results": results})
222
-
223
- finally:
224
- # Cleanup
225
- for temp_file in temp_files:
226
- if os.path.exists(temp_file):
227
- try:
228
- os.unlink(temp_file)
229
- except:
230
- pass
231
-
232
- for file in files:
233
- file.file.close()
234
-
235
-
236
- if __name__ == "__main__":
237
- import uvicorn
238
-
239
- # Run server
240
- uvicorn.run(
241
- "app:app",
242
- host="0.0.0.0",
243
- port=7860, # Hugging Face Spaces default port
244
- reload=False
245
- )
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI Server for Invoice Information Extractor
3
+ Provides REST API for invoice processing
4
+ """
5
+
6
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Form
7
+ from fastapi.responses import JSONResponse
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from contextlib import asynccontextmanager
10
+ from typing import Optional
11
+ import tempfile
12
+ import os
13
+ import shutil
14
+
15
+ from config import API_TITLE, API_DESCRIPTION, API_VERSION
16
+ from model_manager import model_manager
17
+ from inference import InferenceProcessor
18
+
19
+
20
+ @asynccontextmanager
21
+ async def lifespan(app: FastAPI):
22
+ """Lifecycle manager - loads models on startup"""
23
+ print("🚀 Starting Invoice Information Extractor API...")
24
+ print("=" * 60)
25
+
26
+ # Load models on startup
27
+ try:
28
+ model_manager.load_models()
29
+ print("=" * 60)
30
+ print("✅ API is ready to accept requests!")
31
+ print("=" * 60)
32
+ except Exception as e:
33
+ print(f"❌ Failed to load models: {str(e)}")
34
+ raise
35
+
36
+ yield
37
+
38
+ # Cleanup on shutdown
39
+ print("🛑 Shutting down API...")
40
+
41
+
42
+ # Initialize FastAPI app
43
+ app = FastAPI(
44
+ title=API_TITLE,
45
+ description=API_DESCRIPTION,
46
+ version=API_VERSION,
47
+ lifespan=lifespan
48
+ )
49
+
50
+ # Add CORS middleware
51
+ app.add_middleware(
52
+ CORSMiddleware,
53
+ allow_origins=["*"],
54
+ allow_credentials=True,
55
+ allow_methods=["*"],
56
+ allow_headers=["*"],
57
+ )
58
+
59
+
60
+ @app.get("/")
61
+ async def root():
62
+ """Root endpoint - API information"""
63
+ return {
64
+ "name": API_TITLE,
65
+ "version": API_VERSION,
66
+ "status": "running",
67
+ "models_loaded": model_manager.is_loaded(),
68
+ "endpoints": {
69
+ "health": "/health",
70
+ "extract": "/extract (POST)",
71
+ "docs": "/docs"
72
+ }
73
+ }
74
+
75
+
76
+ @app.get("/health")
77
+ async def health_check():
78
+ """Health check endpoint"""
79
+ return {
80
+ "status": "healthy",
81
+ "models_loaded": model_manager.is_loaded()
82
+ }
83
+
84
+
85
+ @app.post("/extract")
86
+ async def extract_invoice(
87
+ file: UploadFile = File(..., description="Invoice image file (JPG, PNG, JPEG)"),
88
+ doc_id: Optional[str] = Form(None, description="Optional document identifier")
89
+ ):
90
+ """
91
+ Extract information from invoice image
92
+
93
+ **Parameters:**
94
+ - **file**: Invoice image file (required)
95
+ - **doc_id**: Optional document identifier (auto-generated from filename if not provided)
96
+
97
+ **Returns:**
98
+ - JSON with extracted fields, confidence scores, and metadata
99
+
100
+ **Example Response:**
101
+ ```json
102
+ {
103
+ "doc_id": "invoice_001",
104
+ "fields": {
105
+ "dealer_name": "ABC Tractors Pvt Ltd",
106
+ "model_name": "Mahindra 575 DI",
107
+ "horse_power": 50,
108
+ "asset_cost": 525000,
109
+ "signature": {"present": true, "bbox": [100, 200, 300, 250]},
110
+ "stamp": {"present": true, "bbox": [400, 500, 500, 550]}
111
+ },
112
+ "confidence": 0.89,
113
+ "processing_time_sec": 3.8,
114
+ "cost_estimate_usd": 0.000528
115
+ }
116
+ ```
117
+ """
118
+
119
+ # Validate file type
120
+ if file.content_type and not file.content_type.startswith("image/"):
121
+ raise HTTPException(
122
+ status_code=400,
123
+ detail="File must be an image (JPG, PNG, JPEG)"
124
+ )
125
+
126
+ # Validate file extension as fallback
127
+ if file.filename:
128
+ ext = os.path.splitext(file.filename)[1].lower()
129
+ if ext not in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']:
130
+ raise HTTPException(
131
+ status_code=400,
132
+ detail="File must be an image (JPG, PNG, JPEG, GIF, BMP, TIFF, WEBP)"
133
+ )
134
+
135
+ # Check if models are loaded
136
+ if not model_manager.is_loaded():
137
+ raise HTTPException(
138
+ status_code=503,
139
+ detail="Models not loaded. Please wait for server initialization."
140
+ )
141
+
142
+ # Save uploaded file to temporary location
143
+ temp_file = None
144
+ try:
145
+ # Create temporary file
146
+ suffix = os.path.splitext(file.filename)[1]
147
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp:
148
+ temp_file = temp.name
149
+ # Write uploaded file content
150
+ shutil.copyfileobj(file.file, temp)
151
+
152
+ # Use filename as doc_id if not provided
153
+ if doc_id is None:
154
+ doc_id = os.path.splitext(file.filename)[0]
155
+
156
+ # Process invoice
157
+ result = InferenceProcessor.process_invoice(temp_file, doc_id)
158
+
159
+ return JSONResponse(content=result, media_type="application/json; charset=utf-8")
160
+
161
+ except Exception as e:
162
+ raise HTTPException(
163
+ status_code=500,
164
+ detail=f"Error processing invoice: {str(e)}"
165
+ )
166
+
167
+ finally:
168
+ # Clean up temporary file
169
+ if temp_file and os.path.exists(temp_file):
170
+ try:
171
+ os.unlink(temp_file)
172
+ except:
173
+ pass
174
+
175
+ # Close uploaded file
176
+ file.file.close()
177
+
178
+
179
+ @app.post("/extract_batch")
180
+ async def extract_batch(
181
+ files: list[UploadFile] = File(..., description="Multiple invoice images")
182
+ ):
183
+ """
184
+ Extract information from multiple invoice images
185
+
186
+ **Parameters:**
187
+ - **files**: List of invoice image files
188
+
189
+ **Returns:**
190
+ - JSON array with results for each invoice
191
+ """
192
+
193
+ if not model_manager.is_loaded():
194
+ raise HTTPException(
195
+ status_code=503,
196
+ detail="Models not loaded. Please wait for server initialization."
197
+ )
198
+
199
+ results = []
200
+ temp_files = []
201
+
202
+ try:
203
+ for file in files:
204
+ # Validate file type
205
+ if not file.content_type.startswith("image/"):
206
+ results.append({
207
+ "filename": file.filename,
208
+ "error": "File must be an image"
209
+ })
210
+ continue
211
+
212
+ # Save to temp file
213
+ suffix = os.path.splitext(file.filename)[1]
214
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp:
215
+ temp_file = temp.name
216
+ temp_files.append(temp_file)
217
+ shutil.copyfileobj(file.file, temp)
218
+
219
+ # Process
220
+ try:
221
+ doc_id = os.path.splitext(file.filename)[0]
222
+ result = InferenceProcessor.process_invoice(temp_file, doc_id)
223
+ results.append(result)
224
+ except Exception as e:
225
+ results.append({
226
+ "filename": file.filename,
227
+ "error": str(e)
228
+ })
229
+
230
+ return JSONResponse(content={"results": results}, media_type="application/json; charset=utf-8")
231
+
232
+ finally:
233
+ # Cleanup
234
+ for temp_file in temp_files:
235
+ if os.path.exists(temp_file):
236
+ try:
237
+ os.unlink(temp_file)
238
+ except:
239
+ pass
240
+
241
+ for file in files:
242
+ file.file.close()
243
+
244
+
245
+ if __name__ == "__main__":
246
+ import uvicorn
247
+
248
+ # Run server
249
+ uvicorn.run(
250
+ "app:app",
251
+ host="0.0.0.0",
252
+ port=7860, # Hugging Face Spaces default port
253
+ reload=False
254
+ )