Spaces:

satvaSolutions
/

Transaction_Reconciliation

Sleeping

App Files Files Community

RajanMalaviya commited on May 9, 2025

Commit

ed3090c

verified ·

1 Parent(s): df4f589

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -7

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ import asyncio
 import psutil
 import cachetools
 import hashlib
-from huggingface_hub import InferenceClient
 app = FastAPI()
@@ -105,10 +105,10 @@ async def process_with_llm(filename: str, raw_text: str):
         raw_text = raw_text[:2000]
         logger.info(f"Truncated raw text for {filename} to 2000 characters, {log_memory_usage()}")
-    # Define models to try with retry logic
     models = [
-        {"model": "google/gemma-2-9b-it", "provider": "auto"},
-        {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "provider": "auto"}
     ]
     for model_info in models:
@@ -158,11 +158,18 @@ async def process_with_llm(filename: str, raw_text: str):
                 structured_data_cache[text_hash] = structured_data
                 logger.info(f"LLM API processing for {filename} with {model}, attempt {attempt+1}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
                 return structured_data
             except Exception as e:
                 logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
-                if attempt == 1:  # No more retries
-                    break
-                await asyncio.sleep(1)  # Wait before retry
     # If all models fail
     error_msg = "All LLM API models failed. Check model availability, authentication, or rate limits."

 import psutil
 import cachetools
 import hashlib
+from huggingface_hub import InferenceClient, HTTPError
 app = FastAPI()
         raw_text = raw_text[:2000]
         logger.info(f"Truncated raw text for {filename} to 2000 characters, {log_memory_usage()}")
+    # Define models to try
     models = [
+        {"model": "google/gemma-2-9b-it", "provider": "hyperbolic"},
+        {"model": "mistral/Mixtral-8x22B-Instruct-v0.1", "provider": "auto"}
     ]
     for model_info in models:
                 structured_data_cache[text_hash] = structured_data
                 logger.info(f"LLM API processing for {filename} with {model}, attempt {attempt+1}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
                 return structured_data
+            except HTTPError as e:
+                if e.status_code == 429:  # Rate limit
+                    logger.warning(f"Rate limit hit for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
+                    if attempt == 1:
+                        break
+                    await asyncio.sleep(2 ** attempt)  # Exponential backoff
+                else:
+                    logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
+                    break
             except Exception as e:
                 logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
+                break
     # If all models fail
     error_msg = "All LLM API models failed. Check model availability, authentication, or rate limits."