RajanMalaviya commited on
Commit
e86531f
·
verified ·
1 Parent(s): 07d2947

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -34,15 +34,19 @@ if not hf_token:
34
  raise HTTPException(status_code=500, detail="HF_TOKEN environment variable not set")
35
 
36
  # Initialize Hugging Face Inference Client with primary and fallback models
37
- primary_model = "Qwen/Qwen2-7B-Instruct"
38
- fallback_model = "mistral/Mixtral-8x7B-Instruct-v0.1"
39
  try:
40
  client = InferenceClient(model=primary_model, token=hf_token, provider="auto")
41
  logger.info(f"Hugging Face Inference Client initialized for {primary_model} with provider='auto'")
42
  except Exception as e:
43
  logger.warning(f"Failed to initialize client for {primary_model}: {str(e)}. Falling back to {fallback_model}")
44
- client = InferenceClient(model=fallback_model, token=hf_token, provider="auto")
45
- logger.info(f"Hugging Face Inference Client initialized for {fallback_model} with provider='auto'")
 
 
 
 
46
 
47
  # In-memory caches (1-hour TTL)
48
  raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
@@ -97,9 +101,9 @@ async def process_pdf_page(img, page_idx):
97
  return ""
98
 
99
  async def process_with_qwen(filename: str, raw_text: str):
100
- """Process raw text with Qwen2-7B-Instruct or fallback via Hugging Face Inference API."""
101
  start_time = time.time()
102
- logger.info(f"Starting Qwen API processing for {filename}, {log_memory_usage()}")
103
 
104
  # Check structured data cache
105
  text_hash = get_text_hash(raw_text)
@@ -144,11 +148,11 @@ async def process_with_qwen(filename: str, raw_text: str):
144
  json_str = llm_output[json_start:json_end]
145
  structured_data = json.loads(json_str)
146
  structured_data_cache[text_hash] = structured_data
147
- logger.info(f"Qwen API processing for {filename}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
148
  return structured_data
149
  except Exception as e:
150
- logger.error(f"Qwen API processing failed for {filename}: {str(e)}, {log_memory_usage()}")
151
- return {"error": f"Qwen API processing failed: {str(e)}"}
152
 
153
  @app.post("/ocr")
154
  async def extract_and_structure(files: List[UploadFile] = File(...)):
@@ -263,7 +267,7 @@ async def extract_and_structure(files: List[UploadFile] = File(...)):
263
  except Exception as e:
264
  logger.warning(f"Text normalization failed for {file.filename}: {str(e)}, {log_memory_usage()}")
265
 
266
- # Process with Qwen API
267
  structured_data = await process_with_qwen(file.filename, raw_text)
268
  success_count += 1
269
  output_json["data"].append({
 
34
  raise HTTPException(status_code=500, detail="HF_TOKEN environment variable not set")
35
 
36
  # Initialize Hugging Face Inference Client with primary and fallback models
37
+ primary_model = "mistral/Mixtral-8x7B-Instruct-v0.1"
38
+ fallback_model = "Qwen/Qwen2-7B-Instruct"
39
  try:
40
  client = InferenceClient(model=primary_model, token=hf_token, provider="auto")
41
  logger.info(f"Hugging Face Inference Client initialized for {primary_model} with provider='auto'")
42
  except Exception as e:
43
  logger.warning(f"Failed to initialize client for {primary_model}: {str(e)}. Falling back to {fallback_model}")
44
+ try:
45
+ client = InferenceClient(model=fallback_model, token=hf_token, provider="hf-inference")
46
+ logger.info(f"Hugging Face Inference Client initialized for {fallback_model} with provider='hf-inference'")
47
+ except Exception as e:
48
+ logger.error(f"Failed to initialize client for {fallback_model}: {str(e)}")
49
+ raise HTTPException(status_code=500, detail=f"Failed to initialize Inference Client: {str(e)}")
50
 
51
  # In-memory caches (1-hour TTL)
52
  raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
 
101
  return ""
102
 
103
  async def process_with_qwen(filename: str, raw_text: str):
104
+ """Process raw text with LLM via Hugging Face Inference API."""
105
  start_time = time.time()
106
+ logger.info(f"Starting LLM API processing for {filename}, {log_memory_usage()}")
107
 
108
  # Check structured data cache
109
  text_hash = get_text_hash(raw_text)
 
148
  json_str = llm_output[json_start:json_end]
149
  structured_data = json.loads(json_str)
150
  structured_data_cache[text_hash] = structured_data
151
+ logger.info(f"LLM API processing for {filename}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
152
  return structured_data
153
  except Exception as e:
154
+ logger.error(f"LLM API processing failed for {filename}: {str(e)}, {log_memory_usage()}")
155
+ return {"error": f"LLM API processing failed: {str(e)}"}
156
 
157
  @app.post("/ocr")
158
  async def extract_and_structure(files: List[UploadFile] = File(...)):
 
267
  except Exception as e:
268
  logger.warning(f"Text normalization failed for {file.filename}: {str(e)}, {log_memory_usage()}")
269
 
270
+ # Process with LLM API
271
  structured_data = await process_with_qwen(file.filename, raw_text)
272
  success_count += 1
273
  output_json["data"].append({