Spaces:
Running
Running
Update backend/app/openrouter_client.py
Browse files- backend/app/openrouter_client.py +61 -32
backend/app/openrouter_client.py
CHANGED
|
@@ -23,7 +23,7 @@ MODEL_NAME = "qwen/qwen3-vl-235b-a22b-instruct"
|
|
| 23 |
# HuggingFace Inference API
|
| 24 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 25 |
HF_INFERENCE_API_URL = "https://api-inference.huggingface.co/models"
|
| 26 |
-
HF_MODEL_NAME = os.environ.get("HF_MODEL_NAME", "Qwen/
|
| 27 |
|
| 28 |
# Backend selection: "openrouter" or "huggingface"
|
| 29 |
EXTRACTION_BACKEND = os.environ.get("EXTRACTION_BACKEND", "openrouter").lower()
|
|
@@ -325,17 +325,10 @@ async def _extract_with_openrouter_single(image_bytes: bytes, page_num: int, tot
|
|
| 325 |
|
| 326 |
|
| 327 |
async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int) -> Dict[str, Any]:
|
| 328 |
-
"""Extract from a single page using HuggingFace Inference API."""
|
| 329 |
if not HF_TOKEN:
|
| 330 |
raise RuntimeError("HF_TOKEN environment variable is not set")
|
| 331 |
|
| 332 |
-
try:
|
| 333 |
-
from huggingface_hub import InferenceClient
|
| 334 |
-
except ImportError:
|
| 335 |
-
raise RuntimeError("huggingface_hub not installed. Add it to requirements.txt")
|
| 336 |
-
|
| 337 |
-
client = InferenceClient(model=HF_MODEL_NAME, token=HF_TOKEN, timeout=180.0)
|
| 338 |
-
|
| 339 |
prompt = (
|
| 340 |
f"Read this document page ({page_num} of {total_pages}) and extract ALL text content. "
|
| 341 |
"Extract every word, number, and piece of information, including any non-English text. "
|
|
@@ -345,39 +338,70 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
|
|
| 345 |
print(f"[INFO] HuggingFace: Processing page {page_num} with model {HF_MODEL_NAME}")
|
| 346 |
|
| 347 |
try:
|
| 348 |
-
#
|
|
|
|
|
|
|
|
|
|
| 349 |
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
| 350 |
-
image_data_url = f"data:image/jpeg;base64,{image_base64}"
|
| 351 |
|
| 352 |
-
#
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
message = result["choices"][0].get("message", {})
|
| 370 |
-
|
| 371 |
-
|
| 372 |
response_text = "".join(
|
| 373 |
item.get("text", "")
|
| 374 |
-
for item in
|
| 375 |
if item.get("type") == "text"
|
| 376 |
)
|
| 377 |
else:
|
| 378 |
-
response_text =
|
| 379 |
else:
|
| 380 |
-
response_text =
|
| 381 |
elif isinstance(result, str):
|
| 382 |
response_text = result
|
| 383 |
else:
|
|
@@ -385,8 +409,13 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
|
|
| 385 |
|
| 386 |
if not response_text:
|
| 387 |
raise ValueError("Empty response from HuggingFace API")
|
|
|
|
|
|
|
| 388 |
|
| 389 |
return _parse_model_response(response_text, page_num)
|
|
|
|
|
|
|
|
|
|
| 390 |
except Exception as e:
|
| 391 |
print(f"[ERROR] HuggingFace API error details: {type(e).__name__}: {str(e)}")
|
| 392 |
raise RuntimeError(f"HuggingFace API error for page {page_num}: {str(e)}")
|
|
|
|
| 23 |
# HuggingFace Inference API
|
| 24 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 25 |
HF_INFERENCE_API_URL = "https://api-inference.huggingface.co/models"
|
| 26 |
+
HF_MODEL_NAME = os.environ.get("HF_MODEL_NAME", "Qwen/Qwen3-VL-235B-A22B-Instruct") # Default HF model
|
| 27 |
|
| 28 |
# Backend selection: "openrouter" or "huggingface"
|
| 29 |
EXTRACTION_BACKEND = os.environ.get("EXTRACTION_BACKEND", "openrouter").lower()
|
|
|
|
| 325 |
|
| 326 |
|
| 327 |
async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int) -> Dict[str, Any]:
|
| 328 |
+
"""Extract from a single page using HuggingFace Inference API (standard endpoint)."""
|
| 329 |
if not HF_TOKEN:
|
| 330 |
raise RuntimeError("HF_TOKEN environment variable is not set")
|
| 331 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
prompt = (
|
| 333 |
f"Read this document page ({page_num} of {total_pages}) and extract ALL text content. "
|
| 334 |
"Extract every word, number, and piece of information, including any non-English text. "
|
|
|
|
| 338 |
print(f"[INFO] HuggingFace: Processing page {page_num} with model {HF_MODEL_NAME}")
|
| 339 |
|
| 340 |
try:
|
| 341 |
+
# Use standard HuggingFace Inference API endpoint (not chat_completion/router)
|
| 342 |
+
api_url = f"{HF_INFERENCE_API_URL}/{HF_MODEL_NAME}"
|
| 343 |
+
|
| 344 |
+
# Convert image to base64
|
| 345 |
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
|
|
|
| 346 |
|
| 347 |
+
# For Qwen3-VL models, use the chat format through standard API
|
| 348 |
+
# The standard API accepts chat-completion format for compatible models
|
| 349 |
+
payload = {
|
| 350 |
+
"inputs": {
|
| 351 |
+
"messages": [
|
| 352 |
+
{
|
| 353 |
+
"role": "user",
|
| 354 |
+
"content": [
|
| 355 |
+
{"type": "text", "text": prompt},
|
| 356 |
+
{"type": "image", "image": f"data:image/jpeg;base64,{image_base64}"}
|
| 357 |
+
]
|
| 358 |
+
}
|
| 359 |
+
]
|
| 360 |
+
},
|
| 361 |
+
"parameters": {
|
| 362 |
+
"max_new_tokens": 2048,
|
| 363 |
+
"temperature": 0.1
|
| 364 |
+
}
|
| 365 |
+
}
|
| 366 |
|
| 367 |
+
headers = {
|
| 368 |
+
"Authorization": f"Bearer {HF_TOKEN}",
|
| 369 |
+
"Content-Type": "application/json"
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
timeout = httpx.Timeout(180.0, connect=30.0)
|
| 373 |
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
| 374 |
+
print(f"[INFO] Making POST request to {api_url}...")
|
| 375 |
+
resp = await client.post(api_url, headers=headers, json=payload)
|
| 376 |
+
print(f"[INFO] Received response: Status {resp.status_code}")
|
| 377 |
+
resp.raise_for_status()
|
| 378 |
+
result = resp.json()
|
| 379 |
+
|
| 380 |
+
# Extract response text - format depends on model response
|
| 381 |
+
response_text = None
|
| 382 |
+
if isinstance(result, list) and len(result) > 0:
|
| 383 |
+
# Standard API often returns list with generated_text
|
| 384 |
+
response_text = result[0].get("generated_text", str(result[0]))
|
| 385 |
+
elif isinstance(result, dict):
|
| 386 |
+
# Check for different response formats
|
| 387 |
+
if "generated_text" in result:
|
| 388 |
+
response_text = result["generated_text"]
|
| 389 |
+
elif "text" in result:
|
| 390 |
+
response_text = result["text"]
|
| 391 |
+
elif "choices" in result and len(result["choices"]) > 0:
|
| 392 |
+
# Chat completion format
|
| 393 |
message = result["choices"][0].get("message", {})
|
| 394 |
+
content = message.get("content", "")
|
| 395 |
+
if isinstance(content, list):
|
| 396 |
response_text = "".join(
|
| 397 |
item.get("text", "")
|
| 398 |
+
for item in content
|
| 399 |
if item.get("type") == "text"
|
| 400 |
)
|
| 401 |
else:
|
| 402 |
+
response_text = content
|
| 403 |
else:
|
| 404 |
+
response_text = str(result)
|
| 405 |
elif isinstance(result, str):
|
| 406 |
response_text = result
|
| 407 |
else:
|
|
|
|
| 409 |
|
| 410 |
if not response_text:
|
| 411 |
raise ValueError("Empty response from HuggingFace API")
|
| 412 |
+
|
| 413 |
+
print(f"[DEBUG] HuggingFace response preview: {response_text[:500]}")
|
| 414 |
|
| 415 |
return _parse_model_response(response_text, page_num)
|
| 416 |
+
except httpx.HTTPStatusError as e:
|
| 417 |
+
print(f"[ERROR] HuggingFace API HTTP error: {e.response.status_code} - {e.response.text[:500]}")
|
| 418 |
+
raise RuntimeError(f"HuggingFace API error for page {page_num}: {e.response.status_code} - {str(e)}")
|
| 419 |
except Exception as e:
|
| 420 |
print(f"[ERROR] HuggingFace API error details: {type(e).__name__}: {str(e)}")
|
| 421 |
raise RuntimeError(f"HuggingFace API error for page {page_num}: {str(e)}")
|