Spaces:

satvaSolutions
/

Transaction_Reconciliation

Sleeping

App Files Files Community

RajanMalaviya commited on May 9, 2025

Commit

df4f589

verified ·

1 Parent(s): e86531f

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -55

app.py CHANGED Viewed

@@ -33,20 +33,9 @@ if not hf_token:
     logger.error("HF_TOKEN environment variable not set")
     raise HTTPException(status_code=500, detail="HF_TOKEN environment variable not set")
-# Initialize Hugging Face Inference Client with primary and fallback models
-primary_model = "mistral/Mixtral-8x7B-Instruct-v0.1"
-fallback_model = "Qwen/Qwen2-7B-Instruct"
-try:
-    client = InferenceClient(model=primary_model, token=hf_token, provider="auto")
-    logger.info(f"Hugging Face Inference Client initialized for {primary_model} with provider='auto'")
-except Exception as e:
-    logger.warning(f"Failed to initialize client for {primary_model}: {str(e)}. Falling back to {fallback_model}")
-    try:
-        client = InferenceClient(model=fallback_model, token=hf_token, provider="hf-inference")
-        logger.info(f"Hugging Face Inference Client initialized for {fallback_model} with provider='hf-inference'")
-    except Exception as e:
-        logger.error(f"Failed to initialize client for {fallback_model}: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Failed to initialize Inference Client: {str(e)}")
 # In-memory caches (1-hour TTL)
 raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
@@ -71,7 +60,7 @@ async def process_image(img_bytes, filename, idx):
     start_time = time.time()
     logger.info(f"Starting OCR for {filename} image {idx}, {log_memory_usage()}")
     try:
-        img = Image.open(io.BytesIO(img_bytes)).resize((800, 600))  # Resize for faster OCR
         img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
         gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
         img_pil = Image.fromarray(cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB))
@@ -88,7 +77,7 @@ async def process_pdf_page(img, page_idx):
     start_time = time.time()
     logger.info(f"Starting OCR for PDF page {page_idx}, {log_memory_usage()}")
     try:
-        img = img.resize((800, 600))  # Resize for faster OCR
         img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
         gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
         img_pil = Image.fromarray(cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB))
@@ -100,7 +89,7 @@ async def process_pdf_page(img, page_idx):
         logger.error(f"OCR failed for PDF page {page_idx}: {str(e)}, {log_memory_usage()}")
         return ""
-async def process_with_qwen(filename: str, raw_text: str):
     """Process raw text with LLM via Hugging Face Inference API."""
     start_time = time.time()
     logger.info(f"Starting LLM API processing for {filename}, {log_memory_usage()}")
@@ -116,43 +105,69 @@ async def process_with_qwen(filename: str, raw_text: str):
         raw_text = raw_text[:2000]
         logger.info(f"Truncated raw text for {filename} to 2000 characters, {log_memory_usage()}")
-    try:
-        prompt = f"""
-        Extract key invoice fields as JSON from the raw text. Support English. Detect currency (e.g., USD, INR). Output only valid JSON, with no additional text, comments, or markdown.
-        Raw text: {raw_text}
-        Output JSON:
-        {{
-            "currency": "",
-            "Name_Client": "",
-            "Products": [],
-            "Subtotal": "",
-            "Tax": "",
-            "total": "",
-            "invoice date": "",
-            "invoice number": ""
-        }}
-        """
-        # Call Hugging Face Inference API
-        response = await asyncio.to_thread(client.chat_completion,
-            messages=[{"role": "user", "content": prompt}],
-            max_tokens=256,
-            temperature=0.7
-        )
-        llm_output = response.choices[0].message.content
-        # Extract JSON from output
-        json_start = llm_output.find("{")
-        json_end = llm_output.rfind("}") + 1
-        if json_start == -1 or json_end == -1:
-            raise ValueError("No valid JSON found in API output")
-        json_str = llm_output[json_start:json_end]
-        structured_data = json.loads(json_str)
-        structured_data_cache[text_hash] = structured_data
-        logger.info(f"LLM API processing for {filename}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
-        return structured_data
-    except Exception as e:
-        logger.error(f"LLM API processing failed for {filename}: {str(e)}, {log_memory_usage()}")
-        return {"error": f"LLM API processing failed: {str(e)}"}
 @app.post("/ocr")
 async def extract_and_structure(files: List[UploadFile] = File(...)):
@@ -265,10 +280,10 @@ async def extract_and_structure(files: List[UploadFile] = File(...)):
                 raw_text_cache[file_hash] = raw_text
                 logger.info(f"Text normalization for {file.filename}, took {time.time() - normalize_start_time:.2f} seconds, text length: {len(raw_text)}, {log_memory_usage()}")
             except Exception as e:
-                logger.warning(f"Text normalization failed for {file.filename}: {str(e)}, {log_memory_usage()}")
         # Process with LLM API
-        structured_data = await process_with_qwen(file.filename, raw_text)
         success_count += 1
         output_json["data"].append({
             "filename": file.filename,

     logger.error("HF_TOKEN environment variable not set")
     raise HTTPException(status_code=500, detail="HF_TOKEN environment variable not set")
+# Initialize Hugging Face Inference Client
+client = InferenceClient(token=hf_token)
+logger.info("Hugging Face Inference Client initialized")
 # In-memory caches (1-hour TTL)
 raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
     start_time = time.time()
     logger.info(f"Starting OCR for {filename} image {idx}, {log_memory_usage()}")
     try:
+        img = Image.open(io.BytesIO(img_bytes)).resize((600, 400))  # Smaller for speed
         img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
         gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
         img_pil = Image.fromarray(cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB))
     start_time = time.time()
     logger.info(f"Starting OCR for PDF page {page_idx}, {log_memory_usage()}")
     try:
+        img = img.resize((600, 400))  # Smaller for speed
         img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
         gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
         img_pil = Image.fromarray(cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB))
         logger.error(f"OCR failed for PDF page {page_idx}: {str(e)}, {log_memory_usage()}")
         return ""
+async def process_with_llm(filename: str, raw_text: str):
     """Process raw text with LLM via Hugging Face Inference API."""
     start_time = time.time()
     logger.info(f"Starting LLM API processing for {filename}, {log_memory_usage()}")
         raw_text = raw_text[:2000]
         logger.info(f"Truncated raw text for {filename} to 2000 characters, {log_memory_usage()}")
+    # Define models to try with retry logic
+    models = [
+        {"model": "google/gemma-2-9b-it", "provider": "auto"},
+        {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "provider": "auto"}
+    ]
+    for model_info in models:
+        model = model_info["model"]
+        provider = model_info["provider"]
+        logger.info(f"Attempting LLM API call with model {model} and provider {provider}")
+        for attempt in range(2):  # Retry once
+            try:
+                prompt = f"""
+                Extract key invoice fields as JSON from the raw text. Support English. Detect currency (e.g., USD, INR). Output only valid JSON, with no additional text, comments, or markdown.
+                Raw text: {raw_text}
+                Output JSON:
+                {{
+                    "currency": "",
+                    "Name_Client": "",
+                    "Products": [],
+                    "Subtotal": "",
+                    "Tax": "",
+                    "total": "",
+                    "invoice date": "",
+                    "invoice number": ""
+                }}
+                """
+                # Call Hugging Face Inference API
+                response = await asyncio.to_thread(client.chat_completion,
+                    model=model,
+                    messages=[{"role": "user", "content": prompt}],
+                    max_tokens=256,
+                    temperature=0.7,
+                    provider=provider
+                )
+                llm_output = response.choices[0].message.content
+                # Extract JSON from output
+                llm_output = llm_output.strip()
+                if not llm_output.startswith("{"):
+                    raise ValueError("API output is not valid JSON")
+                json_start = llm_output.find("{")
+                json_end = llm_output.rfind("}") + 1
+                json_str = llm_output[json_start:json_end]
+                try:
+                    structured_data = json.loads(json_str)
+                except json.JSONDecodeError:
+                    logger.warning(f"JSON parsing failed for {filename}, attempting to fix")
+                    json_str = llm_output[llm_output.find("{"):llm_output.rfind("}")+1]
+                    structured_data = json.loads(json_str)
+                structured_data_cache[text_hash] = structured_data
+                logger.info(f"LLM API processing for {filename} with {model}, attempt {attempt+1}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
+                return structured_data
+            except Exception as e:
+                logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
+                if attempt == 1:  # No more retries
+                    break
+                await asyncio.sleep(1)  # Wait before retry
+    # If all models fail
+    error_msg = "All LLM API models failed. Check model availability, authentication, or rate limits."
+    logger.error(f"{error_msg} for {filename}, {log_memory_usage()}")
+    return {"error": error_msg}
 @app.post("/ocr")
 async def extract_and_structure(files: List[UploadFile] = File(...)):
                 raw_text_cache[file_hash] = raw_text
                 logger.info(f"Text normalization for {file.filename}, took {time.time() - normalize_start_time:.2f} seconds, text length: {len(raw_text)}, {log_memory_usage()}")
             except Exception as e:
+                logger.warning(f"Text normalization failed for {filename}: {str(e)}, {log_memory_usage()}")
         # Process with LLM API
+        structured_data = await process_with_llm(file.filename, raw_text)
         success_count += 1
         output_json["data"].append({
             "filename": file.filename,