import os import asyncio from functools import partial import time import torch from transformers import AutoProcessor, AutoModelForImageTextToText model_name = "Qwen/Qwen3.5-9B-Base" hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN") processor = AutoProcessor.from_pretrained( model_name, token=hf_token, trust_remote_code=True ) model = AutoModelForImageTextToText.from_pretrained( model_name, device_map="auto", dtype=torch.float16, token=hf_token, trust_remote_code=True ) print("CUDA available:", torch.cuda.is_available()) print("Model device:", model.device) if torch.cuda.is_available(): print("GPU name:", torch.cuda.get_device_name(0)) print("Memory allocated:", torch.cuda.memory_allocated() / 1e9, "GB") def execute_llm(model, processor, image, prompt: str = ""): # Your exact prompt preserved as a raw string user_provided_prompt = r"""You convert unstructured text into a structured JSON object. OUTPUT ONLY VALID JSON. NO extra words. NO explanation. NO commentary. GOAL - Extract required data points as key–value pairs. - Organize data into top-level sections only. - Each section contains either: (a) a flat object of key–value pairs, or (b) an array of flat objects for repeated groups. - Depth limit = 2 (root → section → object/array of objects). Never nest sections inside other sections. NAMING RULES - Use the exact section names and key names defined below. - Keys MUST NOT contain the dot character ".". - Maintain human-readable, normalized whitespace in key names. - Never rename keys, never add extra keys. DATA TYPING - Use JSON native types: - Numbers as numbers. - IDs or codes with leading zeros as strings. - Dates as "YYYY-MM-DD" if clearly parseable; otherwise keep original text. - Times as "HH:MM" 24-hour if clear. - If a value exists but is unreadable, return "" (not null). - NEVER infer any missing value. NOTIFICATION NUMBER RULE (IMPORTANT) For “CB Code” fields (e.g., "CB Code" inside PART-1 - BILL OF ENTRY SUMMARY) - Valid examples:"CB Code" = "AAACF2350DCH006" For all “Notn No” fields (e.g., "IGST","G. CESS" inside Item Duty): - Valid examples:"IGST" = "021/2023", "G CESS" = "001/2017". - Return "IGST" and "G CESS" can not be null or Empty For all “Notn SNo” fields (e.g., "IGST", "G CESS" inside Item Duty): - Valid examples:"IGST" = "1" or "III70", "G CESS" = "56". For all “Rate” fields (e.g., BCD, SWS inside Item Duty): - Valid examples:BCD = "7.5", SWS = "10". For all “Notn No” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC = "011/2021". For all “Notn SNo” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC = "18". For all “Amount” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC = "0". - Return Exact value or can be 0 or Empty For all “Duty Fg” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC = "4437540". - Return Exact value which can not be null or empty For all “INVSNO” and “ITEMSN” (inside Part - III - DUTIES) - Full form: “INVSNO” = "Invoice Serial number", “ITEMSN” = "Item Serial Number" - Return “INVSNO” and “ITEMSN” which can not be null or empty Return EXACTLY as shown in the document. REPETITIONS & TABLES - If multiple rows exist, return an array with one object per row. - Each object must remain FLAT. - Do not nest objects deeper than allowed. - Keep column order exactly as defined. MANDATORY BEHAVIOR - Detect which PART the document belongs to. - Output ONLY the JSON object for that PART. - If data for a required field is missing, return "". ============================================= PART-1 — BILL OF ENTRY SUMMARY ============================================= If the document corresponds to PART-1, output: { "PART-1 - BILL OF ENTRY SUMMARY": { "Port Code": "", "BE NO": "", "BE Date": "", "BE TYPE": "", "IEC/Br": "", "CB Code": "", "Mode": "", "DEF BE": "", "ASSESS": "", "EXAM": "", "PROV/FINAL": "", "COUNTRY OF ORIGIN": "", "PORT OF LOADING": "", "PORT OF SHIPMENT": "", "IMPORTER NAME & ADDRESS": "", "AD CODE": "", "CB NAME": "", "BCD": "", "SWS": "", "CVD":"", "IGST": "", "TOT ASS VALUE": "", "TOTAL DUTY": "", "INT": "", "PNLTY": "", "FINE": "", "TOT AMOUNT": "", "Submission": "", "ASSESSMENT": "", "EXCHNAGE RATE": "", "OOC NO": "", "OOC DATE": "" }, "MANIFEST DETAILS": [ { "IGM NO": "", "IGM DATE": "", "INW DATE": "", "MAWB NO": "", "DATE": "", "HAWB NO": "", "HAWB DATE": "", "PKG": "", "GW": "" } ], "BOND DETAILS": [ { "BOND NO": "", "PORT": "", "BOND CD": "", "DEBT AMT": "" } ], "Payment Details": [ { "SR NO": "", "CHALLAN NO": "", "PAID ON": "", "AMOUNT(RS)": "" } ], "CONTAINER DETAILS": [ { "S NO": "", "LCL/FCL": "", "CONTAINER NUMBER": "" } ], "INVOICE DETAILS-SUMMERY": [ { "S NO":"", "INVOICE NO":"", "INV AMT": "", "CUR":"" } ] } ============================================ PART-II — INVOICE & VALUATION DETAILS ============================================ { "PART -II - INVOICE & VALUATION DETAILS": {}, "Details": { "SUPPLIER NAME & ADDRESS": "", "FREIGHT": "", "INSURANCE": "", "LOADING": "", "COMMN": "", "VALUATION METHOD": "", "Cur": "", "Term": "", "REL TD": "", "SVB CH": "", "SVB NO": "", "DATE": "" }, "INVOICE": [ { "S NO": "", "INVOICE No Dt": "" } ], "Item Details": [ { "S NO": "", "CTH": "", "DESCRIPTION": "", "UNIT PRICE": "", "QUANTITY": "", "UQC": "", "AMOUNT": "" } ] } ==================================== PART-III — DUTIES ==================================== ✔ Item Duty section FIXED, simplified, corrected ✔ Each table is ONE row ✔ Field order fixed ✔ Dot-free keys ✔ No duplication { "Part - III - DUTIES": {}, "Item Details": [ { "INVSNO": "", "ITEMSN": "", "CTH": "", "ITEM DESCRIPTION": "", "COO": "", "C_QTY": "", "C_UQC": "", "S_QTY": "", "S_UQC": "", "SCH": "", "END USE": "", "ASSESS VALUE": "", "TOTAL DUTY": "" } ], "Item Duty - Notn No": [ { "INVSNO": "", "ITEMSN": "", "BCD": "", "SWS": "", "IGST": "", "G CESS": "", "ADD": "", "T VALUE": "" } ], "Item Duty - Notn SNo": [ { "INVSNO": "", "ITEMSN": "", "BCD": "", "SWS": "", "IGST": "", "G CESS": "", "ADD": "", "T VALUE": "" } ], "Item Duty - Rate": [ { "INVSNO": "", "ITEMSN": "", "BCD": "", "SWS": "", "IGST": "", "G CESS": "", "ADD": "", "T VALUE": "" } ], "Item Duty - Amount": [ { "INVSNO": "", "ITEMSN": "", "BCD": "", "SWS": "", "IGST": "", "G CESS": "", "ADD": "", "T VALUE": "" } ], "Item Duty - Duty Fg": [ { "INVSNO": "", "ITEMSN": "", "BCD": "", "SWS": "", "IGST": "", "G CESS": "", "ADD": "", "T VALUE": "" } ], "Other Duty - Notn No": [ { "INVSNO": "", "ITEMSN": "", "CAIDC": "" } ], "Other Duty - Notn SNo": [ { "INVSNO": "", "ITEMSN": "", "CAIDC": "" } ], "Other Duty - Rate": [ { "INVSNO": "", "ITEMSN": "", "CAIDC": "" } ], "Other Duty - Amount": [ { "INVSNO": "", "ITEMSN": "", "CAIDC": "" } ], "Other Duty - Duty Fg": [ { "INVSNO": "", "ITEMSN": "", "CAIDC": "" } ] } ======================================== PART-IV — ADDITIONAL DETAILS ======================================== { "PART - IV - ADDITIONAL DETAILS": {}, "LICENSE DETAILS": [ { "INVSNO": "", "ITMSNO": "", "LIC SLNO": "", "LIC NO": "", "LIC DATE": "", "CODE": "", "PORT": "", "DEBIT VALUE": "", "QTY": "", "UQC": "", "DEBIT DUTY": "" } ] } ======================================== PART-V — OTHER COMPLIANCES ======================================== { "PART - V - OTHER COMPLIANCES": { "EXAMINATION ORDER RMS": "", "EXAMINATION ORDER": "", "PGA EXAMINATION INSTRUCTIONS": "", "EXAMINATION REPORT": "", "SUPERINTENDENT COMMENTS": "" } } ========================================== PART-VI — DECLARATIONS ========================================== { "PART-VI- DECLARATIONS": "None" } ========================================== FINAL RULES ========================================== - Detect document PART. - Return ONLY that PART’s JSON. - Output MUST be valid JSON. - No comments or text outside JSON. - No dots in keys. - Do NOT infer values; use "" when missing.""" # Construct the final text input for the model full_prompt = f"<|im_start|>user\n<|image_pad|>\n{user_provided_prompt}<|im_end|>\n<|im_start|>assistant\n" with torch.inference_mode(): inputs = processor( text=[full_prompt], images=[image], return_tensors="pt" ) # Move all tensors in the dictionary to the model's device inputs = {k: v.to(model.device) for k, v in inputs.items()} # Generate outputs = model.generate( **inputs, max_new_tokens=512, do_sample=False, eos_token_id=processor.tokenizer.eos_token_id ) # FIX: Access input_ids using square brackets inputs["input_ids"] prompt_length = inputs["input_ids"].shape[1] generated_ids = outputs[0][prompt_length:] # Decode result = processor.decode( generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False ) return result async def call_llm(image, prompt: str = ""): print("call llm") a=time.time() loop = asyncio.get_event_loop() result = await loop.run_in_executor(None, execute_llm, model, processor, image, "") print('time taken = ',time.time()-a) return result