Spaces:

Seth0330
/

AIEXTRACT1

Running

App Files Files Community

AIEXTRACT1 / backend /app /openrouter_client.py

Seth0330

Create backend/app/openrouter_client.py

d24a0cf verified 18 days ago

raw

history blame

3.69 kB

	import os
	import base64
	import json
	from typing import Any, Dict

	import httpx

	# Get your OpenRouter API key from env (you'll set this in Hugging Face later)
	OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
	OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1/chat/completions"
	MODEL_NAME = "qwen/qwen3-vl-235b-a22b-instruct"


	def _file_to_image_block(file_bytes: bytes, content_type: str) -> Dict[str, Any]:
	"""
	Encode the file as a data URL to feed into the multimodal model.
	For demo purposes we treat PDFs and images the same way here.
	"""
	b64 = base64.b64encode(file_bytes).decode("utf-8")
	return {
	"type": "input_image",
	"image_url": f"data:{content_type};base64,{b64}",
	}


	async def extract_fields_from_document(
	file_bytes: bytes,
	content_type: str,
	filename: str,
	) -> Dict[str, Any]:
	"""
	Call OpenRouter with Qwen3-VL and return parsed JSON with fields.
	We instruct the model to return JSON only.
	"""
	if not OPENROUTER_API_KEY:
	raise RuntimeError("OPENROUTER_API_KEY environment variable is not set")

	image_block = _file_to_image_block(file_bytes, content_type)

	system_prompt = (
	"You are a document extraction engine. "
	"You analyze invoices, receipts, contracts, reports and similar documents, "
	"and output structured JSON only (no explanations or comments)."
	)

	user_prompt = (
	"Extract important key-value pairs from the document and respond with JSON only.\n"
	"Use this shape:\n"
	"{\n"
	' \"doc_type\": \"invoice \| receipt \| contract \| report \| other\",\n'
	' \"confidence\": number between 0 and 100,\n'
	' \"fields\": {\n'
	' \"invoice_number\": \"...\",\n'
	' \"date\": \"...\",\n'
	' \"due_date\": \"...\",\n'
	' \"total_amount\": \"...\",\n'
	' \"currency\": \"...\",\n'
	' \"vendor_name\": \"...\",\n'
	' \"line_items\": [\n'
	' {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
	' ],\n'
	' \"other_field\": \"...\"\n'
	" }\n"
	"}\n"
	"If fields are missing or not applicable, simply omit them."
	)

	payload: Dict[str, Any] = {
	"model": MODEL_NAME,
	"messages": [
	{
	"role": "system",
	"content": [{"type": "text", "text": system_prompt}],
	},
	{
	"role": "user",
	"content": [
	{"type": "text", "text": user_prompt},
	image_block,
	],
	},
	],
	"max_tokens": 2048,
	}

	headers = {
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"Content-Type": "application/json",
	# Optional attribution headers
	"HTTP-Referer": os.environ.get(
	"APP_URL",
	"https://huggingface.co/spaces/your-space",
	),
	"X-Title": "Document Capture Demo",
	}

	async with httpx.AsyncClient(timeout=120) as client:
	resp = await client.post(OPENROUTER_BASE_URL, headers=headers, json=payload)
	resp.raise_for_status()
	data = resp.json()

	# OpenRouter returns choices[0].message.content
	content = data["choices"][0]["message"]["content"]

	# content may be a string or a list of content blocks
	if isinstance(content, list):
	text = "".join(part.get("text", "") for part in content if part.get("type") == "text")
	else:
	text = content

	# Try to parse JSON from the model output
	return json.loads(text)