Spaces:
Sleeping
Sleeping
Update backend/app/openrouter_client.py
Browse files
backend/app/openrouter_client.py
CHANGED
|
@@ -334,7 +334,7 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
|
|
| 334 |
except ImportError:
|
| 335 |
raise RuntimeError("huggingface_hub not installed. Add it to requirements.txt")
|
| 336 |
|
| 337 |
-
client = InferenceClient(model=HF_MODEL_NAME, token=HF_TOKEN)
|
| 338 |
|
| 339 |
prompt = (
|
| 340 |
f"Read this document page ({page_num} of {total_pages}) and extract ALL text content. "
|
|
@@ -345,15 +345,18 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
|
|
| 345 |
print(f"[INFO] HuggingFace: Processing page {page_num} with model {HF_MODEL_NAME}")
|
| 346 |
|
| 347 |
try:
|
| 348 |
-
#
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
| 350 |
result = client.chat_completion(
|
| 351 |
messages=[
|
| 352 |
{
|
| 353 |
"role": "user",
|
| 354 |
"content": [
|
| 355 |
{"type": "text", "text": prompt},
|
| 356 |
-
{"type": "image", "image":
|
| 357 |
]
|
| 358 |
}
|
| 359 |
],
|
|
@@ -363,7 +366,16 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
|
|
| 363 |
# Extract response text
|
| 364 |
if isinstance(result, dict):
|
| 365 |
if "choices" in result and len(result["choices"]) > 0:
|
| 366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
else:
|
| 368 |
response_text = result.get("generated_text", str(result))
|
| 369 |
elif isinstance(result, str):
|
|
|
|
| 334 |
except ImportError:
|
| 335 |
raise RuntimeError("huggingface_hub not installed. Add it to requirements.txt")
|
| 336 |
|
| 337 |
+
client = InferenceClient(model=HF_MODEL_NAME, token=HF_TOKEN, timeout=180.0)
|
| 338 |
|
| 339 |
prompt = (
|
| 340 |
f"Read this document page ({page_num} of {total_pages}) and extract ALL text content. "
|
|
|
|
| 345 |
print(f"[INFO] HuggingFace: Processing page {page_num} with model {HF_MODEL_NAME}")
|
| 346 |
|
| 347 |
try:
|
| 348 |
+
# Convert image bytes to base64 data URL for HuggingFace API
|
| 349 |
+
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
| 350 |
+
image_data_url = f"data:image/jpeg;base64,{image_base64}"
|
| 351 |
+
|
| 352 |
+
# HF Inference API for vision models - use chat completion with base64 image
|
| 353 |
result = client.chat_completion(
|
| 354 |
messages=[
|
| 355 |
{
|
| 356 |
"role": "user",
|
| 357 |
"content": [
|
| 358 |
{"type": "text", "text": prompt},
|
| 359 |
+
{"type": "image", "image": image_data_url} # Use base64 data URL, not raw bytes
|
| 360 |
]
|
| 361 |
}
|
| 362 |
],
|
|
|
|
| 366 |
# Extract response text
|
| 367 |
if isinstance(result, dict):
|
| 368 |
if "choices" in result and len(result["choices"]) > 0:
|
| 369 |
+
message = result["choices"][0].get("message", {})
|
| 370 |
+
if isinstance(message.get("content"), list):
|
| 371 |
+
# Content might be a list of content blocks
|
| 372 |
+
response_text = "".join(
|
| 373 |
+
item.get("text", "")
|
| 374 |
+
for item in message["content"]
|
| 375 |
+
if item.get("type") == "text"
|
| 376 |
+
)
|
| 377 |
+
else:
|
| 378 |
+
response_text = message.get("content", "")
|
| 379 |
else:
|
| 380 |
response_text = result.get("generated_text", str(result))
|
| 381 |
elif isinstance(result, str):
|