RajanMalaviya commited on
Commit
ed3090c
·
verified ·
1 Parent(s): df4f589

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -16,7 +16,7 @@ import asyncio
16
  import psutil
17
  import cachetools
18
  import hashlib
19
- from huggingface_hub import InferenceClient
20
 
21
  app = FastAPI()
22
 
@@ -105,10 +105,10 @@ async def process_with_llm(filename: str, raw_text: str):
105
  raw_text = raw_text[:2000]
106
  logger.info(f"Truncated raw text for {filename} to 2000 characters, {log_memory_usage()}")
107
 
108
- # Define models to try with retry logic
109
  models = [
110
- {"model": "google/gemma-2-9b-it", "provider": "auto"},
111
- {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "provider": "auto"}
112
  ]
113
 
114
  for model_info in models:
@@ -158,11 +158,18 @@ async def process_with_llm(filename: str, raw_text: str):
158
  structured_data_cache[text_hash] = structured_data
159
  logger.info(f"LLM API processing for {filename} with {model}, attempt {attempt+1}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
160
  return structured_data
 
 
 
 
 
 
 
 
 
161
  except Exception as e:
162
  logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
163
- if attempt == 1: # No more retries
164
- break
165
- await asyncio.sleep(1) # Wait before retry
166
 
167
  # If all models fail
168
  error_msg = "All LLM API models failed. Check model availability, authentication, or rate limits."
 
16
  import psutil
17
  import cachetools
18
  import hashlib
19
+ from huggingface_hub import InferenceClient, HTTPError
20
 
21
  app = FastAPI()
22
 
 
105
  raw_text = raw_text[:2000]
106
  logger.info(f"Truncated raw text for {filename} to 2000 characters, {log_memory_usage()}")
107
 
108
+ # Define models to try
109
  models = [
110
+ {"model": "google/gemma-2-9b-it", "provider": "hyperbolic"},
111
+ {"model": "mistral/Mixtral-8x22B-Instruct-v0.1", "provider": "auto"}
112
  ]
113
 
114
  for model_info in models:
 
158
  structured_data_cache[text_hash] = structured_data
159
  logger.info(f"LLM API processing for {filename} with {model}, attempt {attempt+1}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
160
  return structured_data
161
+ except HTTPError as e:
162
+ if e.status_code == 429: # Rate limit
163
+ logger.warning(f"Rate limit hit for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
164
+ if attempt == 1:
165
+ break
166
+ await asyncio.sleep(2 ** attempt) # Exponential backoff
167
+ else:
168
+ logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
169
+ break
170
  except Exception as e:
171
  logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
172
+ break
 
 
173
 
174
  # If all models fail
175
  error_msg = "All LLM API models failed. Check model availability, authentication, or rate limits."