Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,13 +6,15 @@ import time
|
|
| 6 |
import datetime
|
| 7 |
import shutil
|
| 8 |
import tempfile
|
|
|
|
| 9 |
from typing import List, Dict, Optional, Tuple
|
| 10 |
from collections import deque
|
| 11 |
from pathlib import Path
|
| 12 |
|
| 13 |
from fastapi import FastAPI, File, UploadFile, Form, HTTPException, BackgroundTasks
|
| 14 |
from fastapi.middleware.cors import CORSMiddleware
|
| 15 |
-
from fastapi.responses import JSONResponse
|
|
|
|
| 16 |
import fitz # PyMuPDF
|
| 17 |
|
| 18 |
# Google Gemini - optional import
|
|
@@ -22,10 +24,13 @@ try:
|
|
| 22 |
GEMINI_AVAILABLE = True
|
| 23 |
except ImportError:
|
| 24 |
GEMINI_AVAILABLE = False
|
| 25 |
-
print("Warning: google-generativeai not installed.
|
| 26 |
|
| 27 |
app = FastAPI(title="Invoice Splitter API")
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
app.add_middleware(
|
| 30 |
CORSMiddleware,
|
| 31 |
allow_origins=["*"],
|
|
@@ -42,7 +47,7 @@ GEMINI_MODELS = [
|
|
| 42 |
{
|
| 43 |
"name": "gemini-1.5-flash", # UPDATED: Current standard fast model
|
| 44 |
"max_requests_per_minute": 15,
|
| 45 |
-
"timeout":
|
| 46 |
"description": "Primary fast model"
|
| 47 |
},
|
| 48 |
{
|
|
@@ -90,7 +95,7 @@ class SimpleRateLimiter:
|
|
| 90 |
return max(0, self.window_seconds - (time.time() - oldest))
|
| 91 |
|
| 92 |
def reset(self):
|
| 93 |
-
self.requests.clear()
|
| 94 |
self.quota_error_count = 0
|
| 95 |
|
| 96 |
def record_quota_error(self):
|
|
@@ -108,12 +113,12 @@ def check_daily_quota():
|
|
| 108 |
global last_quota_reset, daily_quota_exhausted
|
| 109 |
now = datetime.datetime.now()
|
| 110 |
|
| 111 |
-
if last_quota_reset is None:
|
| 112 |
last_quota_reset = now
|
| 113 |
daily_quota_exhausted = False
|
| 114 |
return True
|
| 115 |
|
| 116 |
-
if now.date() > last_quota_reset.date():
|
| 117 |
print("🔄 Daily quota reset detected")
|
| 118 |
last_quota_reset = now
|
| 119 |
daily_quota_exhausted = False
|
|
@@ -142,8 +147,8 @@ def get_gemini_model():
|
|
| 142 |
try:
|
| 143 |
genai.configure(api_key=GEMINI_API_KEY)
|
| 144 |
gemini_model = genai.GenerativeModel(model_config["name"])
|
| 145 |
-
print(f"✓ Initialized:
|
| 146 |
-
except Exception as e:
|
| 147 |
print(f"Failed to initialize {model_config['name']}: {e}")
|
| 148 |
return None
|
| 149 |
return gemini_model
|
|
@@ -159,7 +164,7 @@ def switch_to_next_model():
|
|
| 159 |
window_seconds=60
|
| 160 |
)
|
| 161 |
gemini_model = None
|
| 162 |
-
print(f"🔄 SWITCHED TO MODEL:
|
| 163 |
return get_gemini_model()
|
| 164 |
return None
|
| 165 |
|
|
@@ -180,11 +185,11 @@ def reset_to_primary_model():
|
|
| 180 |
|
| 181 |
# --- Regex Patterns ---
|
| 182 |
INVOICE_NO_RE = re.compile(
|
| 183 |
-
r"""(?:Invoice\s*No
|
| 184 |
re.IGNORECASE | re.VERBOSE
|
| 185 |
)
|
| 186 |
PREFIXED_INVOICE_RE = re.compile(r"\b([A-Z]{2,4}[-/]\d{4,}(?:/\d+)?[A-Z]*)\b")
|
| 187 |
-
GST_LIKE_RE = re.compile(r"\b((?:GSTIN|GST\s*No\.?|GST\s*IN|GST)[\s:\-]*([0-9A-Z]{15}))\b", re.IGNORECASE)
|
| 188 |
|
| 189 |
|
| 190 |
def is_image_based_pdf(doc: fitz.Document, sample_pages: int = 3) -> Tuple[bool, float]:
|
|
@@ -192,29 +197,31 @@ def is_image_based_pdf(doc: fitz.Document, sample_pages: int = 3) -> Tuple[bool,
|
|
| 192 |
pages_to_check = min(sample_pages, doc.page_count)
|
| 193 |
for i in range(pages_to_check):
|
| 194 |
text = doc.load_page(i).get_text("text") or ""
|
| 195 |
-
total_text_length += len(text.strip())
|
| 196 |
avg_text_length = total_text_length / pages_to_check
|
| 197 |
return avg_text_length < 50, avg_text_length
|
| 198 |
|
| 199 |
|
| 200 |
# --- Extraction Logic ---
|
| 201 |
def normalize_text_for_search(s: str) -> str:
|
| 202 |
-
if not s:
|
|
|
|
| 203 |
s = s.replace("\u00A0", " ")
|
| 204 |
return re.sub(r"[ ]{2,}", " ", re.sub(r"[\r\n\t]+", " ", s)).strip()
|
| 205 |
|
| 206 |
|
| 207 |
def try_extract_invoice_from_text(text: str) -> Optional[str]:
|
| 208 |
-
if not text:
|
|
|
|
| 209 |
text_norm = normalize_text_for_search(text)
|
| 210 |
|
| 211 |
-
m = INVOICE_NO_RE.search(text_norm)
|
| 212 |
if m:
|
| 213 |
inv = (m.group(1) or "").strip()
|
| 214 |
-
if inv and len(inv) > 2 and inv.lower() not in ("invoice", "bill"):
|
| 215 |
return inv
|
| 216 |
|
| 217 |
-
m = PREFIXED_INVOICE_RE.search(text_norm[:600])
|
| 218 |
if m:
|
| 219 |
inv = (m.group(1) or "").strip()
|
| 220 |
if inv and len(re.sub(r"[^A-Za-z0-9]", "", inv)) >= 5:
|
|
@@ -229,10 +236,12 @@ def try_extract_invoice_from_text(text: str) -> Optional[str]:
|
|
| 229 |
return None
|
| 230 |
|
| 231 |
|
| 232 |
-
def extract_invoice_gemini(page:
|
| 233 |
-
if not check_daily_quota():
|
|
|
|
| 234 |
model = get_gemini_model()
|
| 235 |
-
if not model:
|
|
|
|
| 236 |
|
| 237 |
if not gemini_rate_limiter.allow_request():
|
| 238 |
wait_time = gemini_rate_limiter.wait_time()
|
|
@@ -241,28 +250,40 @@ def extract_invoice_gemini(page: fitz.Page, retry_count=0) -> Optional[str]:
|
|
| 241 |
return extract_invoice_gemini(page, retry_count)
|
| 242 |
|
| 243 |
try:
|
| 244 |
-
|
|
|
|
| 245 |
img_bytes = pix.tobytes("png")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
img = Image.open(io.BytesIO(img_bytes))
|
| 247 |
|
| 248 |
-
prompt = """Extract the invoice number.
|
| 249 |
|
| 250 |
response = model.generate_content([prompt, img])
|
|
|
|
|
|
|
|
|
|
| 251 |
if response and response.text:
|
| 252 |
txt = response.text.strip().replace("*", "").replace("#", "")
|
| 253 |
if txt and txt != "NOT_FOUND" and len(txt) > 2:
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
# Fallback to OCR text
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
except Exception as e:
|
| 264 |
error_str = str(e).lower()
|
| 265 |
-
if "429" in str(e) or "quota" in error_str:
|
| 266 |
gemini_rate_limiter.record_quota_error()
|
| 267 |
if "per_day" in error_str:
|
| 268 |
mark_daily_quota_exhausted()
|
|
@@ -278,89 +299,174 @@ def extract_invoice_no_from_page(page: fitz.Page, is_image_pdf: bool) -> Optiona
|
|
| 278 |
# 1. Try Text Extraction (Fastest)
|
| 279 |
text = page.get_text("text") or ""
|
| 280 |
inv = try_extract_invoice_from_text(text)
|
| 281 |
-
if inv:
|
|
|
|
| 282 |
|
| 283 |
# 2. Try Block Extraction
|
| 284 |
for block in (page.get_text("blocks") or []):
|
| 285 |
-
if len(block) > 4 and block[4]:
|
| 286 |
inv = try_extract_invoice_from_text(block[4])
|
| 287 |
-
if inv:
|
|
|
|
| 288 |
|
| 289 |
# 3. Gemini Fallback (Only if enabled and seemingly image-based)
|
| 290 |
-
if is_image_pdf:
|
| 291 |
return extract_invoice_gemini(page)
|
| 292 |
|
| 293 |
return None
|
| 294 |
|
| 295 |
|
| 296 |
def build_pdf_from_pages(src_doc: fitz.Document, page_indices: List[int]) -> bytes:
|
|
|
|
| 297 |
out = fitz.open()
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
|
| 302 |
|
| 303 |
# --- File Cleanup Utility ---
|
| 304 |
def remove_file(path: str):
|
| 305 |
try:
|
| 306 |
-
os.
|
| 307 |
-
|
|
|
|
| 308 |
except Exception as e:
|
| 309 |
-
print(f"Warning: Could not remove temp file {path}: {e}")
|
| 310 |
|
| 311 |
|
| 312 |
# ============================================================================
|
| 313 |
# API ENDPOINTS
|
| 314 |
# ============================================================================
|
| 315 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
@app.post("/split-invoices")
|
| 317 |
async def split_invoices(
|
| 318 |
background_tasks: BackgroundTasks,
|
| 319 |
file: UploadFile = File(...),
|
| 320 |
include_pdf: bool = Form(True),
|
|
|
|
| 321 |
):
|
| 322 |
-
|
| 323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
| 329 |
|
| 330 |
try:
|
| 331 |
-
# Stream upload
|
| 332 |
-
print(f"📥 Receiving
|
|
|
|
|
|
|
| 333 |
with open(temp_path, "wb") as buffer:
|
| 334 |
-
#
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
buffer.write(content)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
|
| 338 |
-
|
|
|
|
| 339 |
|
| 340 |
-
# Open
|
| 341 |
doc = fitz.open(temp_path)
|
| 342 |
|
| 343 |
-
if doc.page_count == 0:
|
| 344 |
-
raise HTTPException(status_code=400, detail="
|
| 345 |
|
| 346 |
-
print(f"Processing {doc.page_count} pages...")
|
| 347 |
|
| 348 |
-
# Step 1: Detect
|
| 349 |
-
|
|
|
|
|
|
|
| 350 |
|
| 351 |
-
# Step 2:
|
| 352 |
page_invoice_nos = []
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
clean_invs = [
|
| 363 |
-
None if (v and v.upper().startswith("GST:")) else v
|
| 364 |
for v in page_invoice_nos
|
| 365 |
]
|
| 366 |
|
|
@@ -369,65 +475,260 @@ async def split_invoices(
|
|
| 369 |
current_inv = None
|
| 370 |
|
| 371 |
for idx, inv in enumerate(clean_invs):
|
| 372 |
-
if current_inv is None:
|
| 373 |
current_inv = inv
|
| 374 |
current_group = [idx]
|
| 375 |
else:
|
| 376 |
if inv is not None and inv != current_inv:
|
| 377 |
# Save previous group
|
| 378 |
-
groups.append({"invoice_no": current_inv, "pages":
|
| 379 |
# Start new group
|
| 380 |
current_inv = inv
|
| 381 |
current_group = [idx]
|
| 382 |
-
else:
|
| 383 |
current_group.append(idx)
|
| 384 |
|
| 385 |
if current_group:
|
| 386 |
-
groups.append({"invoice_no": current_inv, "pages": current_group})
|
| 387 |
|
| 388 |
-
#
|
| 389 |
if len(groups) > 1 and groups[0]["invoice_no"] is None and groups[1]["invoice_no"] is not None:
|
|
|
|
| 390 |
groups[1]["pages"] = groups[0]["pages"] + groups[1]["pages"]
|
| 391 |
-
groups.pop(0)
|
|
|
|
|
|
|
| 392 |
|
| 393 |
-
# Step 4: Build
|
| 394 |
parts = []
|
| 395 |
-
|
| 396 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
part_bytes = build_pdf_from_pages(doc, g["pages"])
|
|
|
|
| 398 |
info = {
|
| 399 |
"invoice_no": g["invoice_no"],
|
| 400 |
-
"pages": [p + 1 for p in g["pages"]],
|
| 401 |
-
"
|
|
|
|
|
|
|
| 402 |
}
|
| 403 |
-
|
| 404 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
parts.append(info)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
|
| 407 |
-
doc.close()
|
| 408 |
-
|
| 409 |
return JSONResponse({
|
|
|
|
| 410 |
"count": len(parts),
|
| 411 |
"parts": parts,
|
| 412 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
})
|
| 414 |
|
| 415 |
-
except
|
| 416 |
-
|
|
|
|
|
|
|
|
|
|
| 417 |
import traceback
|
| 418 |
traceback.print_exc()
|
| 419 |
-
|
| 420 |
|
| 421 |
finally:
|
| 422 |
-
#
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
remove_file(temp_path)
|
|
|
|
|
|
|
|
|
|
| 427 |
|
| 428 |
|
| 429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
import uvicorn
|
| 431 |
-
print("🚀 Starting High-Performance Invoice Splitter")
|
| 432 |
-
|
| 433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import datetime
|
| 7 |
import shutil
|
| 8 |
import tempfile
|
| 9 |
+
import gc
|
| 10 |
from typing import List, Dict, Optional, Tuple
|
| 11 |
from collections import deque
|
| 12 |
from pathlib import Path
|
| 13 |
|
| 14 |
from fastapi import FastAPI, File, UploadFile, Form, HTTPException, BackgroundTasks
|
| 15 |
from fastapi.middleware.cors import CORSMiddleware
|
| 16 |
+
from fastapi.responses import JSONResponse, StreamingResponse
|
| 17 |
+
from starlette.requests import Request
|
| 18 |
import fitz # PyMuPDF
|
| 19 |
|
| 20 |
# Google Gemini - optional import
|
|
|
|
| 24 |
GEMINI_AVAILABLE = True
|
| 25 |
except ImportError:
|
| 26 |
GEMINI_AVAILABLE = False
|
| 27 |
+
print("Warning: google-generativeai not installed. Image-based PDFs won't be supported.")
|
| 28 |
|
| 29 |
app = FastAPI(title="Invoice Splitter API")
|
| 30 |
|
| 31 |
+
# ⭐ Increase max request body size (default is 1MB-2MB)
|
| 32 |
+
Request.max_body_size = 200 * 1024 * 1024 # 200MB limit
|
| 33 |
+
|
| 34 |
app.add_middleware(
|
| 35 |
CORSMiddleware,
|
| 36 |
allow_origins=["*"],
|
|
|
|
| 47 |
{
|
| 48 |
"name": "gemini-1.5-flash", # UPDATED: Current standard fast model
|
| 49 |
"max_requests_per_minute": 15,
|
| 50 |
+
"timeout": 300,
|
| 51 |
"description": "Primary fast model"
|
| 52 |
},
|
| 53 |
{
|
|
|
|
| 95 |
return max(0, self.window_seconds - (time.time() - oldest))
|
| 96 |
|
| 97 |
def reset(self):
|
| 98 |
+
self.requests. clear()
|
| 99 |
self.quota_error_count = 0
|
| 100 |
|
| 101 |
def record_quota_error(self):
|
|
|
|
| 113 |
global last_quota_reset, daily_quota_exhausted
|
| 114 |
now = datetime.datetime.now()
|
| 115 |
|
| 116 |
+
if last_quota_reset is None:
|
| 117 |
last_quota_reset = now
|
| 118 |
daily_quota_exhausted = False
|
| 119 |
return True
|
| 120 |
|
| 121 |
+
if now. date() > last_quota_reset.date():
|
| 122 |
print("🔄 Daily quota reset detected")
|
| 123 |
last_quota_reset = now
|
| 124 |
daily_quota_exhausted = False
|
|
|
|
| 147 |
try:
|
| 148 |
genai.configure(api_key=GEMINI_API_KEY)
|
| 149 |
gemini_model = genai.GenerativeModel(model_config["name"])
|
| 150 |
+
print(f"✓ Initialized: {model_config['name']}")
|
| 151 |
+
except Exception as e:
|
| 152 |
print(f"Failed to initialize {model_config['name']}: {e}")
|
| 153 |
return None
|
| 154 |
return gemini_model
|
|
|
|
| 164 |
window_seconds=60
|
| 165 |
)
|
| 166 |
gemini_model = None
|
| 167 |
+
print(f"🔄 SWITCHED TO MODEL: {model_config['name']}")
|
| 168 |
return get_gemini_model()
|
| 169 |
return None
|
| 170 |
|
|
|
|
| 185 |
|
| 186 |
# --- Regex Patterns ---
|
| 187 |
INVOICE_NO_RE = re.compile(
|
| 188 |
+
r"""(?: Invoice\s*No\. ?|Inv\. ?\s*No\.?|Bill\s*No\.?|Document\s*No\.?|Doc\s*No\.?|Tax\s*Invoice\s*No\.?)\s*[:\-]?\s*([A-Z0-9][A-Z0-9\-\/]{3,})""",
|
| 189 |
re.IGNORECASE | re.VERBOSE
|
| 190 |
)
|
| 191 |
PREFIXED_INVOICE_RE = re.compile(r"\b([A-Z]{2,4}[-/]\d{4,}(?:/\d+)?[A-Z]*)\b")
|
| 192 |
+
GST_LIKE_RE = re.compile(r"\b((?: GSTIN|GST\s*No\.?|GST\s*IN|GST)[\s:\-]*([0-9A-Z]{15}))\b", re.IGNORECASE)
|
| 193 |
|
| 194 |
|
| 195 |
def is_image_based_pdf(doc: fitz.Document, sample_pages: int = 3) -> Tuple[bool, float]:
|
|
|
|
| 197 |
pages_to_check = min(sample_pages, doc.page_count)
|
| 198 |
for i in range(pages_to_check):
|
| 199 |
text = doc.load_page(i).get_text("text") or ""
|
| 200 |
+
total_text_length += len(text. strip())
|
| 201 |
avg_text_length = total_text_length / pages_to_check
|
| 202 |
return avg_text_length < 50, avg_text_length
|
| 203 |
|
| 204 |
|
| 205 |
# --- Extraction Logic ---
|
| 206 |
def normalize_text_for_search(s: str) -> str:
|
| 207 |
+
if not s:
|
| 208 |
+
return s
|
| 209 |
s = s.replace("\u00A0", " ")
|
| 210 |
return re.sub(r"[ ]{2,}", " ", re.sub(r"[\r\n\t]+", " ", s)).strip()
|
| 211 |
|
| 212 |
|
| 213 |
def try_extract_invoice_from_text(text: str) -> Optional[str]:
|
| 214 |
+
if not text:
|
| 215 |
+
return None
|
| 216 |
text_norm = normalize_text_for_search(text)
|
| 217 |
|
| 218 |
+
m = INVOICE_NO_RE. search(text_norm)
|
| 219 |
if m:
|
| 220 |
inv = (m.group(1) or "").strip()
|
| 221 |
+
if inv and len(inv) > 2 and inv. lower() not in ("invoice", "bill"):
|
| 222 |
return inv
|
| 223 |
|
| 224 |
+
m = PREFIXED_INVOICE_RE.search(text_norm[: 600])
|
| 225 |
if m:
|
| 226 |
inv = (m.group(1) or "").strip()
|
| 227 |
if inv and len(re.sub(r"[^A-Za-z0-9]", "", inv)) >= 5:
|
|
|
|
| 236 |
return None
|
| 237 |
|
| 238 |
|
| 239 |
+
def extract_invoice_gemini(page: fitz.Page, retry_count=0) -> Optional[str]:
|
| 240 |
+
if not check_daily_quota():
|
| 241 |
+
return None
|
| 242 |
model = get_gemini_model()
|
| 243 |
+
if not model:
|
| 244 |
+
return None
|
| 245 |
|
| 246 |
if not gemini_rate_limiter.allow_request():
|
| 247 |
wait_time = gemini_rate_limiter.wait_time()
|
|
|
|
| 250 |
return extract_invoice_gemini(page, retry_count)
|
| 251 |
|
| 252 |
try:
|
| 253 |
+
# ⭐ Reduced resolution from 2x to 1.5x to save memory
|
| 254 |
+
pix = page.get_pixmap(matrix=fitz.Matrix(1. 5, 1.5), dpi=150)
|
| 255 |
img_bytes = pix.tobytes("png")
|
| 256 |
+
|
| 257 |
+
# ⭐ Explicitly free pixmap memory
|
| 258 |
+
pix = None
|
| 259 |
+
|
| 260 |
img = Image.open(io.BytesIO(img_bytes))
|
| 261 |
|
| 262 |
+
prompt = """Extract the invoice number. Return ONLY the number. If not found, return 'NOT_FOUND'."""
|
| 263 |
|
| 264 |
response = model.generate_content([prompt, img])
|
| 265 |
+
|
| 266 |
+
# Try to get invoice number from response
|
| 267 |
+
result = None
|
| 268 |
if response and response.text:
|
| 269 |
txt = response.text.strip().replace("*", "").replace("#", "")
|
| 270 |
if txt and txt != "NOT_FOUND" and len(txt) > 2:
|
| 271 |
+
result = txt
|
| 272 |
+
|
| 273 |
+
# Fallback to OCR text if no result
|
| 274 |
+
if not result:
|
| 275 |
+
ocr_resp = model.generate_content(["Extract all text.", img])
|
| 276 |
+
if ocr_resp and ocr_resp.text:
|
| 277 |
+
result = try_extract_invoice_from_text(ocr_resp.text)
|
| 278 |
+
|
| 279 |
+
# ⭐ Free image memory
|
| 280 |
+
img. close()
|
| 281 |
+
|
| 282 |
+
return result
|
| 283 |
|
| 284 |
except Exception as e:
|
| 285 |
error_str = str(e).lower()
|
| 286 |
+
if "429" in str(e) or "quota" in error_str:
|
| 287 |
gemini_rate_limiter.record_quota_error()
|
| 288 |
if "per_day" in error_str:
|
| 289 |
mark_daily_quota_exhausted()
|
|
|
|
| 299 |
# 1. Try Text Extraction (Fastest)
|
| 300 |
text = page.get_text("text") or ""
|
| 301 |
inv = try_extract_invoice_from_text(text)
|
| 302 |
+
if inv:
|
| 303 |
+
return inv
|
| 304 |
|
| 305 |
# 2. Try Block Extraction
|
| 306 |
for block in (page.get_text("blocks") or []):
|
| 307 |
+
if len(block) > 4 and block[4]:
|
| 308 |
inv = try_extract_invoice_from_text(block[4])
|
| 309 |
+
if inv:
|
| 310 |
+
return inv
|
| 311 |
|
| 312 |
# 3. Gemini Fallback (Only if enabled and seemingly image-based)
|
| 313 |
+
if is_image_pdf:
|
| 314 |
return extract_invoice_gemini(page)
|
| 315 |
|
| 316 |
return None
|
| 317 |
|
| 318 |
|
| 319 |
def build_pdf_from_pages(src_doc: fitz.Document, page_indices: List[int]) -> bytes:
|
| 320 |
+
"""Build a PDF with memory optimization"""
|
| 321 |
out = fitz.open()
|
| 322 |
+
try:
|
| 323 |
+
for i in page_indices:
|
| 324 |
+
out.insert_pdf(src_doc, from_page=i, to_page=i)
|
| 325 |
+
|
| 326 |
+
# ⭐ Optimize and compress output PDF
|
| 327 |
+
pdf_bytes = out.tobytes(garbage=4, deflate=True)
|
| 328 |
+
return pdf_bytes
|
| 329 |
+
finally:
|
| 330 |
+
out.close()
|
| 331 |
|
| 332 |
|
| 333 |
# --- File Cleanup Utility ---
|
| 334 |
def remove_file(path: str):
|
| 335 |
try:
|
| 336 |
+
if os.path.exists(path):
|
| 337 |
+
os.remove(path)
|
| 338 |
+
print(f"🧹 Cleaned up temp file: {path}")
|
| 339 |
except Exception as e:
|
| 340 |
+
print(f"⚠️ Warning: Could not remove temp file {path}: {e}")
|
| 341 |
|
| 342 |
|
| 343 |
# ============================================================================
|
| 344 |
# API ENDPOINTS
|
| 345 |
# ============================================================================
|
| 346 |
|
| 347 |
+
@app.get("/")
|
| 348 |
+
async def root():
|
| 349 |
+
return {
|
| 350 |
+
"service": "Invoice Splitter API",
|
| 351 |
+
"version": "2.0",
|
| 352 |
+
"max_file_size_mb": 200,
|
| 353 |
+
"gemini_available": GEMINI_AVAILABLE,
|
| 354 |
+
"gemini_configured": bool(GEMINI_API_KEY)
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
@app.get("/health")
|
| 359 |
+
async def health():
|
| 360 |
+
return {
|
| 361 |
+
"status": "healthy",
|
| 362 |
+
"gemini_status": {
|
| 363 |
+
"available": GEMINI_AVAILABLE,
|
| 364 |
+
"configured": bool(GEMINI_API_KEY),
|
| 365 |
+
"current_model": GEMINI_MODELS[current_model_index]["name"],
|
| 366 |
+
"daily_quota_exhausted": daily_quota_exhausted
|
| 367 |
+
}
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
|
| 371 |
@app.post("/split-invoices")
|
| 372 |
async def split_invoices(
|
| 373 |
background_tasks: BackgroundTasks,
|
| 374 |
file: UploadFile = File(...),
|
| 375 |
include_pdf: bool = Form(True),
|
| 376 |
+
max_file_size_mb: int = Form(200)
|
| 377 |
):
|
| 378 |
+
"""
|
| 379 |
+
Split a large PDF file into separate invoices.
|
| 380 |
+
|
| 381 |
+
Parameters:
|
| 382 |
+
- file: PDF file to split (max 200MB)
|
| 383 |
+
- include_pdf: Include base64-encoded PDFs in response (default: True)
|
| 384 |
+
- max_file_size_mb: Maximum file size in MB (default: 200)
|
| 385 |
+
|
| 386 |
+
Returns:
|
| 387 |
+
- JSON with split invoice parts
|
| 388 |
+
"""
|
| 389 |
+
if not file.filename.lower().endswith(". pdf"):
|
| 390 |
+
raise HTTPException(status_code=400, detail="Only PDF files are supported")
|
| 391 |
|
| 392 |
+
max_size_bytes = max_file_size_mb * 1024 * 1024
|
| 393 |
+
|
| 394 |
+
# Create temporary file
|
| 395 |
+
fd, temp_path = tempfile. mkstemp(suffix=".pdf")
|
| 396 |
+
os.close(fd)
|
| 397 |
+
|
| 398 |
+
doc = None # Initialize for finally block
|
| 399 |
|
| 400 |
try:
|
| 401 |
+
# ⭐ Stream upload with size tracking and validation
|
| 402 |
+
print(f"📥 Receiving file: {file.filename}")
|
| 403 |
+
total_size = 0
|
| 404 |
+
|
| 405 |
with open(temp_path, "wb") as buffer:
|
| 406 |
+
# ⭐ Use 5MB chunks for faster processing
|
| 407 |
+
chunk_size = 5 * 1024 * 1024
|
| 408 |
+
|
| 409 |
+
while content := await file.read(chunk_size):
|
| 410 |
+
total_size += len(content)
|
| 411 |
+
|
| 412 |
+
# ⭐ Check size limit during upload
|
| 413 |
+
if total_size > max_size_bytes:
|
| 414 |
+
raise HTTPException(
|
| 415 |
+
status_code=413,
|
| 416 |
+
detail=f"File too large. Maximum size: {max_file_size_mb}MB, received: {total_size / (1024*1024):.1f}MB"
|
| 417 |
+
)
|
| 418 |
+
|
| 419 |
buffer.write(content)
|
| 420 |
+
|
| 421 |
+
# ⭐ Progress logging for large files
|
| 422 |
+
if total_size % (20 * 1024 * 1024) < chunk_size: # Every ~20MB
|
| 423 |
+
print(f" 📊 Uploaded: {total_size / (1024*1024):.1f}MB")
|
| 424 |
|
| 425 |
+
file_size_mb = total_size / (1024 * 1024)
|
| 426 |
+
print(f"💾 Saved {file_size_mb:.2f}MB to: {temp_path}")
|
| 427 |
|
| 428 |
+
# ⭐ Open PDF from disk (memory-mapped)
|
| 429 |
doc = fitz.open(temp_path)
|
| 430 |
|
| 431 |
+
if doc. page_count == 0:
|
| 432 |
+
raise HTTPException(status_code=400, detail="PDF file is empty")
|
| 433 |
|
| 434 |
+
print(f"📄 Processing {doc.page_count} pages...")
|
| 435 |
|
| 436 |
+
# Step 1: Detect if image-based PDF (check fewer pages for large PDFs)
|
| 437 |
+
sample_pages = min(3, doc.page_count)
|
| 438 |
+
is_image_pdf, avg_text = is_image_based_pdf(doc, sample_pages)
|
| 439 |
+
print(f" PDF Type: {'Image-based' if is_image_pdf else 'Text-based'} (avg text: {avg_text:.1f} chars)")
|
| 440 |
|
| 441 |
+
# Step 2: Extract invoice numbers from all pages
|
| 442 |
page_invoice_nos = []
|
| 443 |
+
|
| 444 |
+
for i in range(doc. page_count):
|
| 445 |
+
# ⭐ Progress logging for large documents
|
| 446 |
+
if i > 0 and i % 50 == 0:
|
| 447 |
+
print(f" 📄 Processed {i}/{doc.page_count} pages")
|
| 448 |
+
|
| 449 |
+
page = doc. load_page(i)
|
| 450 |
+
|
| 451 |
+
try:
|
| 452 |
+
inv = extract_invoice_no_from_page(page, is_image_pdf)
|
| 453 |
+
page_invoice_nos.append(inv)
|
| 454 |
+
|
| 455 |
+
if inv:
|
| 456 |
+
print(f" Page {i+1}: Found invoice '{inv}'")
|
| 457 |
+
finally:
|
| 458 |
+
# ⭐ Explicitly free page resources
|
| 459 |
+
page = None
|
| 460 |
+
|
| 461 |
+
# ⭐ Force garbage collection every 100 pages
|
| 462 |
+
if i > 0 and i % 100 == 0:
|
| 463 |
+
gc.collect()
|
| 464 |
+
|
| 465 |
+
print(f"✓ Extraction complete. Found {sum(1 for x in page_invoice_nos if x)} invoice numbers")
|
| 466 |
+
|
| 467 |
+
# Step 3: Filter GST-only entries and group pages
|
| 468 |
clean_invs = [
|
| 469 |
+
None if (v and v.upper().startswith("GST: ")) else v
|
| 470 |
for v in page_invoice_nos
|
| 471 |
]
|
| 472 |
|
|
|
|
| 475 |
current_inv = None
|
| 476 |
|
| 477 |
for idx, inv in enumerate(clean_invs):
|
| 478 |
+
if current_inv is None:
|
| 479 |
current_inv = inv
|
| 480 |
current_group = [idx]
|
| 481 |
else:
|
| 482 |
if inv is not None and inv != current_inv:
|
| 483 |
# Save previous group
|
| 484 |
+
groups.append({"invoice_no": current_inv, "pages": current_group})
|
| 485 |
# Start new group
|
| 486 |
current_inv = inv
|
| 487 |
current_group = [idx]
|
| 488 |
+
else:
|
| 489 |
current_group.append(idx)
|
| 490 |
|
| 491 |
if current_group:
|
| 492 |
+
groups. append({"invoice_no": current_inv, "pages": current_group})
|
| 493 |
|
| 494 |
+
# ⭐ Smart merging: If first page has no invoice, merge with second group
|
| 495 |
if len(groups) > 1 and groups[0]["invoice_no"] is None and groups[1]["invoice_no"] is not None:
|
| 496 |
+
print(f" 🔗 Merging first {len(groups[0]['pages'])} pages with invoice '{groups[1]['invoice_no']}'")
|
| 497 |
groups[1]["pages"] = groups[0]["pages"] + groups[1]["pages"]
|
| 498 |
+
groups. pop(0)
|
| 499 |
+
|
| 500 |
+
print(f"📦 Created {len(groups)} invoice groups")
|
| 501 |
|
| 502 |
+
# Step 4: Build response with PDFs
|
| 503 |
parts = []
|
| 504 |
+
total_response_size = 0
|
| 505 |
+
max_response_size = 100 * 1024 * 1024 # 100MB response limit
|
| 506 |
+
|
| 507 |
+
for idx, g in enumerate(groups):
|
| 508 |
+
print(f" 🔨 Building PDF part {idx+1}/{len(groups)} (Invoice: {g['invoice_no'] or 'Unknown'})")
|
| 509 |
+
|
| 510 |
part_bytes = build_pdf_from_pages(doc, g["pages"])
|
| 511 |
+
|
| 512 |
info = {
|
| 513 |
"invoice_no": g["invoice_no"],
|
| 514 |
+
"pages": [p + 1 for p in g["pages"]], # 1-based page numbers
|
| 515 |
+
"page_count": len(g["pages"]),
|
| 516 |
+
"size_bytes": len(part_bytes),
|
| 517 |
+
"size_mb": round(len(part_bytes) / (1024 * 1024), 2)
|
| 518 |
}
|
| 519 |
+
|
| 520 |
+
# ⭐ Handle large responses - skip base64 if total response too large
|
| 521 |
+
if include_pdf:
|
| 522 |
+
base64_size = len(part_bytes) * 4 / 3 # Base64 encoding overhead
|
| 523 |
+
total_response_size += base64_size
|
| 524 |
+
|
| 525 |
+
if total_response_size > max_response_size:
|
| 526 |
+
print(f" ⚠️ Response size exceeds 100MB. Skipping base64 for remaining parts.")
|
| 527 |
+
info["pdf_base64"] = None
|
| 528 |
+
info["warning"] = "PDF too large for inline response. Use streaming endpoint or set include_pdf=false"
|
| 529 |
+
else:
|
| 530 |
+
info["pdf_base64"] = base64.b64encode(part_bytes).decode("ascii")
|
| 531 |
+
else:
|
| 532 |
+
info["pdf_base64"] = None
|
| 533 |
+
|
| 534 |
parts.append(info)
|
| 535 |
+
|
| 536 |
+
# ⭐ Free memory immediately
|
| 537 |
+
del part_bytes
|
| 538 |
+
|
| 539 |
+
# ⭐ Garbage collect after each part
|
| 540 |
+
if idx % 5 == 0:
|
| 541 |
+
gc.collect()
|
| 542 |
+
|
| 543 |
+
print(f"✅ Successfully split into {len(parts)} parts")
|
| 544 |
|
|
|
|
|
|
|
| 545 |
return JSONResponse({
|
| 546 |
+
"success": True,
|
| 547 |
"count": len(parts),
|
| 548 |
"parts": parts,
|
| 549 |
+
"source_file": {
|
| 550 |
+
"name": file.filename,
|
| 551 |
+
"size_mb": round(file_size_mb, 2),
|
| 552 |
+
"total_pages": doc.page_count,
|
| 553 |
+
"is_image_pdf": is_image_pdf
|
| 554 |
+
},
|
| 555 |
+
"quota_status": {
|
| 556 |
+
"daily_exhausted": daily_quota_exhausted,
|
| 557 |
+
"current_model": GEMINI_MODELS[current_model_index]["name"]
|
| 558 |
+
}
|
| 559 |
})
|
| 560 |
|
| 561 |
+
except HTTPException:
|
| 562 |
+
raise # Re-raise HTTP exceptions as-is
|
| 563 |
+
|
| 564 |
+
except Exception as e:
|
| 565 |
+
print(f"❌ Critical Error: {e}")
|
| 566 |
import traceback
|
| 567 |
traceback.print_exc()
|
| 568 |
+
raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
|
| 569 |
|
| 570 |
finally:
|
| 571 |
+
# ⭐ Critical cleanup in correct order
|
| 572 |
+
if doc:
|
| 573 |
+
try:
|
| 574 |
+
doc.close()
|
| 575 |
+
print("📕 Closed PDF document")
|
| 576 |
+
except Exception as e:
|
| 577 |
+
print(f"⚠️ Error closing document: {e}")
|
| 578 |
+
|
| 579 |
+
# Delete temp file
|
| 580 |
remove_file(temp_path)
|
| 581 |
+
|
| 582 |
+
# ⭐ Final garbage collection
|
| 583 |
+
gc.collect()
|
| 584 |
|
| 585 |
|
| 586 |
+
@app.post("/split-invoices-stream")
|
| 587 |
+
async def split_invoices_stream(
|
| 588 |
+
background_tasks: BackgroundTasks,
|
| 589 |
+
file: UploadFile = File(...),
|
| 590 |
+
max_file_size_mb: int = Form(200)
|
| 591 |
+
):
|
| 592 |
+
"""
|
| 593 |
+
Streaming version for extremely large files.
|
| 594 |
+
Returns NDJSON (newline-delimited JSON) with each part as a separate line.
|
| 595 |
+
|
| 596 |
+
This avoids building a large JSON response in memory.
|
| 597 |
+
"""
|
| 598 |
+
import json
|
| 599 |
+
|
| 600 |
+
if not file.filename.lower().endswith(".pdf"):
|
| 601 |
+
raise HTTPException(status_code=400, detail="Only PDF files are supported")
|
| 602 |
+
|
| 603 |
+
max_size_bytes = max_file_size_mb * 1024 * 1024
|
| 604 |
+
fd, temp_path = tempfile. mkstemp(suffix=".pdf")
|
| 605 |
+
os.close(fd)
|
| 606 |
+
|
| 607 |
+
# Upload file
|
| 608 |
+
try:
|
| 609 |
+
total_size = 0
|
| 610 |
+
with open(temp_path, "wb") as buffer:
|
| 611 |
+
chunk_size = 5 * 1024 * 1024
|
| 612 |
+
while content := await file.read(chunk_size):
|
| 613 |
+
total_size += len(content)
|
| 614 |
+
if total_size > max_size_bytes:
|
| 615 |
+
remove_file(temp_path)
|
| 616 |
+
raise HTTPException(status_code=413, detail=f"File too large. Max: {max_file_size_mb}MB")
|
| 617 |
+
buffer.write(content)
|
| 618 |
+
except Exception as e:
|
| 619 |
+
remove_file(temp_path)
|
| 620 |
+
raise
|
| 621 |
+
|
| 622 |
+
async def generate_parts():
|
| 623 |
+
doc = None
|
| 624 |
+
try:
|
| 625 |
+
doc = fitz.open(temp_path)
|
| 626 |
+
|
| 627 |
+
# Send initial status
|
| 628 |
+
yield json.dumps({
|
| 629 |
+
"type": "status",
|
| 630 |
+
"status": "processing",
|
| 631 |
+
"total_pages": doc.page_count,
|
| 632 |
+
"filename": file.filename
|
| 633 |
+
}) + "\n"
|
| 634 |
+
|
| 635 |
+
# Detect PDF type
|
| 636 |
+
is_image_pdf, _ = is_image_based_pdf(doc)
|
| 637 |
+
|
| 638 |
+
# Extract invoice numbers
|
| 639 |
+
page_invoice_nos = []
|
| 640 |
+
for i in range(doc.page_count):
|
| 641 |
+
page = doc. load_page(i)
|
| 642 |
+
inv = extract_invoice_no_from_page(page, is_image_pdf)
|
| 643 |
+
page_invoice_nos.append(inv)
|
| 644 |
+
page = None
|
| 645 |
+
|
| 646 |
+
if i % 100 == 0:
|
| 647 |
+
gc.collect()
|
| 648 |
+
|
| 649 |
+
# Group pages
|
| 650 |
+
clean_invs = [None if (v and v.upper().startswith("GST:")) else v for v in page_invoice_nos]
|
| 651 |
+
groups = []
|
| 652 |
+
current_group = []
|
| 653 |
+
current_inv = None
|
| 654 |
+
|
| 655 |
+
for idx, inv in enumerate(clean_invs):
|
| 656 |
+
if current_inv is None:
|
| 657 |
+
current_inv = inv
|
| 658 |
+
current_group = [idx]
|
| 659 |
+
else:
|
| 660 |
+
if inv is not None and inv != current_inv:
|
| 661 |
+
groups. append({"invoice_no": current_inv, "pages": current_group})
|
| 662 |
+
current_inv = inv
|
| 663 |
+
current_group = [idx]
|
| 664 |
+
else:
|
| 665 |
+
current_group. append(idx)
|
| 666 |
+
|
| 667 |
+
if current_group:
|
| 668 |
+
groups.append({"invoice_no": current_inv, "pages": current_group})
|
| 669 |
+
|
| 670 |
+
if len(groups) > 1 and groups[0]["invoice_no"] is None and groups[1]["invoice_no"] is not None:
|
| 671 |
+
groups[1]["pages"] = groups[0]["pages"] + groups[1]["pages"]
|
| 672 |
+
groups.pop(0)
|
| 673 |
+
|
| 674 |
+
# Stream each part
|
| 675 |
+
for idx, g in enumerate(groups):
|
| 676 |
+
part_bytes = build_pdf_from_pages(doc, g["pages"])
|
| 677 |
+
|
| 678 |
+
info = {
|
| 679 |
+
"type": "part",
|
| 680 |
+
"part_index": idx,
|
| 681 |
+
"invoice_no": g["invoice_no"],
|
| 682 |
+
"pages": [p + 1 for p in g["pages"]],
|
| 683 |
+
"page_count": len(g["pages"]),
|
| 684 |
+
"size_bytes": len(part_bytes),
|
| 685 |
+
"pdf_base64": base64.b64encode(part_bytes).decode("ascii")
|
| 686 |
+
}
|
| 687 |
+
|
| 688 |
+
yield json.dumps(info) + "\n"
|
| 689 |
+
del part_bytes
|
| 690 |
+
gc.collect()
|
| 691 |
+
|
| 692 |
+
# Send completion status
|
| 693 |
+
yield json.dumps({
|
| 694 |
+
"type": "complete",
|
| 695 |
+
"total_parts": len(groups)
|
| 696 |
+
}) + "\n"
|
| 697 |
+
|
| 698 |
+
except Exception as e:
|
| 699 |
+
yield json.dumps({
|
| 700 |
+
"type": "error",
|
| 701 |
+
"error": str(e)
|
| 702 |
+
}) + "\n"
|
| 703 |
+
finally:
|
| 704 |
+
if doc:
|
| 705 |
+
doc.close()
|
| 706 |
+
remove_file(temp_path)
|
| 707 |
+
gc.collect()
|
| 708 |
+
|
| 709 |
+
return StreamingResponse(
|
| 710 |
+
generate_parts(),
|
| 711 |
+
media_type="application/x-ndjson",
|
| 712 |
+
headers={
|
| 713 |
+
"Content-Disposition": f"attachment; filename=invoices-split. ndjson"
|
| 714 |
+
}
|
| 715 |
+
)
|
| 716 |
+
|
| 717 |
+
|
| 718 |
+
if __name__ == "__main__":
|
| 719 |
import uvicorn
|
| 720 |
+
print("🚀 Starting High-Performance Invoice Splitter API")
|
| 721 |
+
print(f" Max file size: 200MB")
|
| 722 |
+
print(f" Gemini available: {GEMINI_AVAILABLE}")
|
| 723 |
+
print(f" Gemini configured: {bool(GEMINI_API_KEY)}")
|
| 724 |
+
|
| 725 |
+
# ⭐ Configure uvicorn for large files
|
| 726 |
+
uvicorn.run(
|
| 727 |
+
app,
|
| 728 |
+
host="0.0.0.0",
|
| 729 |
+
port=7860,
|
| 730 |
+
workers=1, # Single worker to maintain rate limiter state
|
| 731 |
+
timeout_keep_alive=300, # 5 minutes for large uploads
|
| 732 |
+
limit_concurrency=10,
|
| 733 |
+
limit_max_requests=1000
|
| 734 |
+
)
|