Seth0330 commited on
Commit
32c001b
·
verified ·
1 Parent(s): d0cfc3b

Update backend/app/openrouter_client.py

Browse files
Files changed (1) hide show
  1. backend/app/openrouter_client.py +17 -5
backend/app/openrouter_client.py CHANGED
@@ -334,7 +334,7 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
334
  except ImportError:
335
  raise RuntimeError("huggingface_hub not installed. Add it to requirements.txt")
336
 
337
- client = InferenceClient(model=HF_MODEL_NAME, token=HF_TOKEN)
338
 
339
  prompt = (
340
  f"Read this document page ({page_num} of {total_pages}) and extract ALL text content. "
@@ -345,15 +345,18 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
345
  print(f"[INFO] HuggingFace: Processing page {page_num} with model {HF_MODEL_NAME}")
346
 
347
  try:
348
- # HF Inference API for vision models - use image-to-text or chat completion
349
- # For vision models, we need to use the chat completion format
 
 
 
350
  result = client.chat_completion(
351
  messages=[
352
  {
353
  "role": "user",
354
  "content": [
355
  {"type": "text", "text": prompt},
356
- {"type": "image", "image": image_bytes}
357
  ]
358
  }
359
  ],
@@ -363,7 +366,16 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
363
  # Extract response text
364
  if isinstance(result, dict):
365
  if "choices" in result and len(result["choices"]) > 0:
366
- response_text = result["choices"][0].get("message", {}).get("content", "")
 
 
 
 
 
 
 
 
 
367
  else:
368
  response_text = result.get("generated_text", str(result))
369
  elif isinstance(result, str):
 
334
  except ImportError:
335
  raise RuntimeError("huggingface_hub not installed. Add it to requirements.txt")
336
 
337
+ client = InferenceClient(model=HF_MODEL_NAME, token=HF_TOKEN, timeout=180.0)
338
 
339
  prompt = (
340
  f"Read this document page ({page_num} of {total_pages}) and extract ALL text content. "
 
345
  print(f"[INFO] HuggingFace: Processing page {page_num} with model {HF_MODEL_NAME}")
346
 
347
  try:
348
+ # Convert image bytes to base64 data URL for HuggingFace API
349
+ image_base64 = base64.b64encode(image_bytes).decode('utf-8')
350
+ image_data_url = f"data:image/jpeg;base64,{image_base64}"
351
+
352
+ # HF Inference API for vision models - use chat completion with base64 image
353
  result = client.chat_completion(
354
  messages=[
355
  {
356
  "role": "user",
357
  "content": [
358
  {"type": "text", "text": prompt},
359
+ {"type": "image", "image": image_data_url} # Use base64 data URL, not raw bytes
360
  ]
361
  }
362
  ],
 
366
  # Extract response text
367
  if isinstance(result, dict):
368
  if "choices" in result and len(result["choices"]) > 0:
369
+ message = result["choices"][0].get("message", {})
370
+ if isinstance(message.get("content"), list):
371
+ # Content might be a list of content blocks
372
+ response_text = "".join(
373
+ item.get("text", "")
374
+ for item in message["content"]
375
+ if item.get("type") == "text"
376
+ )
377
+ else:
378
+ response_text = message.get("content", "")
379
  else:
380
  response_text = result.get("generated_text", str(result))
381
  elif isinstance(result, str):