import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import json, re model_id = "LiquidAI/LFM2-350M-Extract" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") system_prompt = """Return data as a JSON object with the following schema: - orders: list of objects: - product: Product name - price: Price as number without $ sign - quantity: Number of items as integer""" def clean_result(parsed): for order in parsed.get("orders", []): if "price" in order: # "$29.99" → 29.99 price = str(order["price"]).replace("$", "").replace(",", "").strip() try: order["price"] = float(price) except ValueError: pass return parsed def extract_all(user_input): messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_input} ] inputs = tokenizer.apply_chat_template( messages, return_tensors="pt", return_dict=True, add_generation_prompt=True ).to(model.device) input_len = inputs["input_ids"].shape[1] outputs = model.generate( **inputs, max_new_tokens=256, temperature=0, do_sample=False, repetition_penalty=1.3 ) response = tokenizer.decode( outputs[0][input_len:], skip_special_tokens=True ).strip() response = re.sub(r'```json|```', '', response).strip() try: parsed = json.loads(response) parsed = clean_result(parsed) # ← додай цей рядок return json.dumps(parsed, indent=2, ensure_ascii=False) except json.JSONDecodeError: return response demo = gr.Interface( fn=extract_all, inputs=gr.Textbox(label="Input Text", lines=8), outputs=gr.Textbox(label="Extracted JSON", lines=12) ) demo.launch()