Update app.py
Browse files
app.py
CHANGED
|
@@ -34,15 +34,19 @@ if not hf_token:
|
|
| 34 |
raise HTTPException(status_code=500, detail="HF_TOKEN environment variable not set")
|
| 35 |
|
| 36 |
# Initialize Hugging Face Inference Client with primary and fallback models
|
| 37 |
-
primary_model = "
|
| 38 |
-
fallback_model = "
|
| 39 |
try:
|
| 40 |
client = InferenceClient(model=primary_model, token=hf_token, provider="auto")
|
| 41 |
logger.info(f"Hugging Face Inference Client initialized for {primary_model} with provider='auto'")
|
| 42 |
except Exception as e:
|
| 43 |
logger.warning(f"Failed to initialize client for {primary_model}: {str(e)}. Falling back to {fallback_model}")
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# In-memory caches (1-hour TTL)
|
| 48 |
raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
|
|
@@ -97,9 +101,9 @@ async def process_pdf_page(img, page_idx):
|
|
| 97 |
return ""
|
| 98 |
|
| 99 |
async def process_with_qwen(filename: str, raw_text: str):
|
| 100 |
-
"""Process raw text with
|
| 101 |
start_time = time.time()
|
| 102 |
-
logger.info(f"Starting
|
| 103 |
|
| 104 |
# Check structured data cache
|
| 105 |
text_hash = get_text_hash(raw_text)
|
|
@@ -144,11 +148,11 @@ async def process_with_qwen(filename: str, raw_text: str):
|
|
| 144 |
json_str = llm_output[json_start:json_end]
|
| 145 |
structured_data = json.loads(json_str)
|
| 146 |
structured_data_cache[text_hash] = structured_data
|
| 147 |
-
logger.info(f"
|
| 148 |
return structured_data
|
| 149 |
except Exception as e:
|
| 150 |
-
logger.error(f"
|
| 151 |
-
return {"error": f"
|
| 152 |
|
| 153 |
@app.post("/ocr")
|
| 154 |
async def extract_and_structure(files: List[UploadFile] = File(...)):
|
|
@@ -263,7 +267,7 @@ async def extract_and_structure(files: List[UploadFile] = File(...)):
|
|
| 263 |
except Exception as e:
|
| 264 |
logger.warning(f"Text normalization failed for {file.filename}: {str(e)}, {log_memory_usage()}")
|
| 265 |
|
| 266 |
-
# Process with
|
| 267 |
structured_data = await process_with_qwen(file.filename, raw_text)
|
| 268 |
success_count += 1
|
| 269 |
output_json["data"].append({
|
|
|
|
| 34 |
raise HTTPException(status_code=500, detail="HF_TOKEN environment variable not set")
|
| 35 |
|
| 36 |
# Initialize Hugging Face Inference Client with primary and fallback models
|
| 37 |
+
primary_model = "mistral/Mixtral-8x7B-Instruct-v0.1"
|
| 38 |
+
fallback_model = "Qwen/Qwen2-7B-Instruct"
|
| 39 |
try:
|
| 40 |
client = InferenceClient(model=primary_model, token=hf_token, provider="auto")
|
| 41 |
logger.info(f"Hugging Face Inference Client initialized for {primary_model} with provider='auto'")
|
| 42 |
except Exception as e:
|
| 43 |
logger.warning(f"Failed to initialize client for {primary_model}: {str(e)}. Falling back to {fallback_model}")
|
| 44 |
+
try:
|
| 45 |
+
client = InferenceClient(model=fallback_model, token=hf_token, provider="hf-inference")
|
| 46 |
+
logger.info(f"Hugging Face Inference Client initialized for {fallback_model} with provider='hf-inference'")
|
| 47 |
+
except Exception as e:
|
| 48 |
+
logger.error(f"Failed to initialize client for {fallback_model}: {str(e)}")
|
| 49 |
+
raise HTTPException(status_code=500, detail=f"Failed to initialize Inference Client: {str(e)}")
|
| 50 |
|
| 51 |
# In-memory caches (1-hour TTL)
|
| 52 |
raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
|
|
|
|
| 101 |
return ""
|
| 102 |
|
| 103 |
async def process_with_qwen(filename: str, raw_text: str):
|
| 104 |
+
"""Process raw text with LLM via Hugging Face Inference API."""
|
| 105 |
start_time = time.time()
|
| 106 |
+
logger.info(f"Starting LLM API processing for {filename}, {log_memory_usage()}")
|
| 107 |
|
| 108 |
# Check structured data cache
|
| 109 |
text_hash = get_text_hash(raw_text)
|
|
|
|
| 148 |
json_str = llm_output[json_start:json_end]
|
| 149 |
structured_data = json.loads(json_str)
|
| 150 |
structured_data_cache[text_hash] = structured_data
|
| 151 |
+
logger.info(f"LLM API processing for {filename}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
|
| 152 |
return structured_data
|
| 153 |
except Exception as e:
|
| 154 |
+
logger.error(f"LLM API processing failed for {filename}: {str(e)}, {log_memory_usage()}")
|
| 155 |
+
return {"error": f"LLM API processing failed: {str(e)}"}
|
| 156 |
|
| 157 |
@app.post("/ocr")
|
| 158 |
async def extract_and_structure(files: List[UploadFile] = File(...)):
|
|
|
|
| 267 |
except Exception as e:
|
| 268 |
logger.warning(f"Text normalization failed for {file.filename}: {str(e)}, {log_memory_usage()}")
|
| 269 |
|
| 270 |
+
# Process with LLM API
|
| 271 |
structured_data = await process_with_qwen(file.filename, raw_text)
|
| 272 |
success_count += 1
|
| 273 |
output_json["data"].append({
|