Seth0330 commited on
Commit
d24a0cf
·
verified ·
1 Parent(s): 92314c4

Create backend/app/openrouter_client.py

Browse files
Files changed (1) hide show
  1. backend/app/openrouter_client.py +112 -0
backend/app/openrouter_client.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import json
4
+ from typing import Any, Dict
5
+
6
+ import httpx
7
+
8
+ # Get your OpenRouter API key from env (you'll set this in Hugging Face later)
9
+ OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
10
+ OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1/chat/completions"
11
+ MODEL_NAME = "qwen/qwen3-vl-235b-a22b-instruct"
12
+
13
+
14
+ def _file_to_image_block(file_bytes: bytes, content_type: str) -> Dict[str, Any]:
15
+ """
16
+ Encode the file as a data URL to feed into the multimodal model.
17
+ For demo purposes we treat PDFs and images the same way here.
18
+ """
19
+ b64 = base64.b64encode(file_bytes).decode("utf-8")
20
+ return {
21
+ "type": "input_image",
22
+ "image_url": f"data:{content_type};base64,{b64}",
23
+ }
24
+
25
+
26
+ async def extract_fields_from_document(
27
+ file_bytes: bytes,
28
+ content_type: str,
29
+ filename: str,
30
+ ) -> Dict[str, Any]:
31
+ """
32
+ Call OpenRouter with Qwen3-VL and return parsed JSON with fields.
33
+ We instruct the model to return JSON only.
34
+ """
35
+ if not OPENROUTER_API_KEY:
36
+ raise RuntimeError("OPENROUTER_API_KEY environment variable is not set")
37
+
38
+ image_block = _file_to_image_block(file_bytes, content_type)
39
+
40
+ system_prompt = (
41
+ "You are a document extraction engine. "
42
+ "You analyze invoices, receipts, contracts, reports and similar documents, "
43
+ "and output structured JSON only (no explanations or comments)."
44
+ )
45
+
46
+ user_prompt = (
47
+ "Extract important key-value pairs from the document and respond with JSON only.\n"
48
+ "Use this shape:\n"
49
+ "{\n"
50
+ ' \"doc_type\": \"invoice | receipt | contract | report | other\",\n'
51
+ ' \"confidence\": number between 0 and 100,\n'
52
+ ' \"fields\": {\n'
53
+ ' \"invoice_number\": \"...\",\n'
54
+ ' \"date\": \"...\",\n'
55
+ ' \"due_date\": \"...\",\n'
56
+ ' \"total_amount\": \"...\",\n'
57
+ ' \"currency\": \"...\",\n'
58
+ ' \"vendor_name\": \"...\",\n'
59
+ ' \"line_items\": [\n'
60
+ ' {\"description\": \"...\", \"quantity\": \"...\", \"unit_price\": \"...\", \"line_total\": \"...\"}\n'
61
+ ' ],\n'
62
+ ' \"other_field\": \"...\"\n'
63
+ " }\n"
64
+ "}\n"
65
+ "If fields are missing or not applicable, simply omit them."
66
+ )
67
+
68
+ payload: Dict[str, Any] = {
69
+ "model": MODEL_NAME,
70
+ "messages": [
71
+ {
72
+ "role": "system",
73
+ "content": [{"type": "text", "text": system_prompt}],
74
+ },
75
+ {
76
+ "role": "user",
77
+ "content": [
78
+ {"type": "text", "text": user_prompt},
79
+ image_block,
80
+ ],
81
+ },
82
+ ],
83
+ "max_tokens": 2048,
84
+ }
85
+
86
+ headers = {
87
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
88
+ "Content-Type": "application/json",
89
+ # Optional attribution headers
90
+ "HTTP-Referer": os.environ.get(
91
+ "APP_URL",
92
+ "https://huggingface.co/spaces/your-space",
93
+ ),
94
+ "X-Title": "Document Capture Demo",
95
+ }
96
+
97
+ async with httpx.AsyncClient(timeout=120) as client:
98
+ resp = await client.post(OPENROUTER_BASE_URL, headers=headers, json=payload)
99
+ resp.raise_for_status()
100
+ data = resp.json()
101
+
102
+ # OpenRouter returns choices[0].message.content
103
+ content = data["choices"][0]["message"]["content"]
104
+
105
+ # content may be a string or a list of content blocks
106
+ if isinstance(content, list):
107
+ text = "".join(part.get("text", "") for part in content if part.get("type") == "text")
108
+ else:
109
+ text = content
110
+
111
+ # Try to parse JSON from the model output
112
+ return json.loads(text)