Update app.py
Browse files
app.py
CHANGED
|
@@ -16,7 +16,7 @@ import asyncio
|
|
| 16 |
import psutil
|
| 17 |
import cachetools
|
| 18 |
import hashlib
|
| 19 |
-
from huggingface_hub import InferenceClient
|
| 20 |
|
| 21 |
app = FastAPI()
|
| 22 |
|
|
@@ -105,10 +105,10 @@ async def process_with_llm(filename: str, raw_text: str):
|
|
| 105 |
raw_text = raw_text[:2000]
|
| 106 |
logger.info(f"Truncated raw text for {filename} to 2000 characters, {log_memory_usage()}")
|
| 107 |
|
| 108 |
-
# Define models to try
|
| 109 |
models = [
|
| 110 |
-
{"model": "google/gemma-2-9b-it", "provider": "
|
| 111 |
-
{"model": "
|
| 112 |
]
|
| 113 |
|
| 114 |
for model_info in models:
|
|
@@ -158,11 +158,18 @@ async def process_with_llm(filename: str, raw_text: str):
|
|
| 158 |
structured_data_cache[text_hash] = structured_data
|
| 159 |
logger.info(f"LLM API processing for {filename} with {model}, attempt {attempt+1}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
|
| 160 |
return structured_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
except Exception as e:
|
| 162 |
logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
|
| 163 |
-
|
| 164 |
-
break
|
| 165 |
-
await asyncio.sleep(1) # Wait before retry
|
| 166 |
|
| 167 |
# If all models fail
|
| 168 |
error_msg = "All LLM API models failed. Check model availability, authentication, or rate limits."
|
|
|
|
| 16 |
import psutil
|
| 17 |
import cachetools
|
| 18 |
import hashlib
|
| 19 |
+
from huggingface_hub import InferenceClient, HTTPError
|
| 20 |
|
| 21 |
app = FastAPI()
|
| 22 |
|
|
|
|
| 105 |
raw_text = raw_text[:2000]
|
| 106 |
logger.info(f"Truncated raw text for {filename} to 2000 characters, {log_memory_usage()}")
|
| 107 |
|
| 108 |
+
# Define models to try
|
| 109 |
models = [
|
| 110 |
+
{"model": "google/gemma-2-9b-it", "provider": "hyperbolic"},
|
| 111 |
+
{"model": "mistral/Mixtral-8x22B-Instruct-v0.1", "provider": "auto"}
|
| 112 |
]
|
| 113 |
|
| 114 |
for model_info in models:
|
|
|
|
| 158 |
structured_data_cache[text_hash] = structured_data
|
| 159 |
logger.info(f"LLM API processing for {filename} with {model}, attempt {attempt+1}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
|
| 160 |
return structured_data
|
| 161 |
+
except HTTPError as e:
|
| 162 |
+
if e.status_code == 429: # Rate limit
|
| 163 |
+
logger.warning(f"Rate limit hit for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
|
| 164 |
+
if attempt == 1:
|
| 165 |
+
break
|
| 166 |
+
await asyncio.sleep(2 ** attempt) # Exponential backoff
|
| 167 |
+
else:
|
| 168 |
+
logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
|
| 169 |
+
break
|
| 170 |
except Exception as e:
|
| 171 |
logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
|
| 172 |
+
break
|
|
|
|
|
|
|
| 173 |
|
| 174 |
# If all models fail
|
| 175 |
error_msg = "All LLM API models failed. Check model availability, authentication, or rate limits."
|