Spaces:

aifakepro
/

LFM

Sleeping

File size: 1,920 Bytes

897ace2
 
980b14c
897ace2
9bb9154
897ace2
 
 
00de7fe
79d2b6d
 
 
00de7fe
980b14c
10d6469
 
 
 
 
 
 
 
 
 
 
980b14c
79d2b6d
 
 
 
 
 
 
 
 
980b14c
79d2b6d
 
 
 
 
 
 
 
 
 
 
980b14c
79d2b6d
 
10d6469
79d2b6d
 
 
897ace2
 
980b14c
 
 
897ace2

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import json, re

model_id = "LiquidAI/LFM2-350M-Extract"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")

system_prompt = """Return data as a JSON object with the following schema:
- orders: list of objects:
  - product: Product name
  - price: Price as number without $ sign
  - quantity: Number of items as integer"""

def clean_result(parsed):
    for order in parsed.get("orders", []):
        if "price" in order:
            # "$29.99" → 29.99
            price = str(order["price"]).replace("$", "").replace(",", "").strip()
            try:
                order["price"] = float(price)
            except ValueError:
                pass
    return parsed

def extract_all(user_input):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_input}
    ]
    inputs = tokenizer.apply_chat_template(
        messages, return_tensors="pt", return_dict=True,
        add_generation_prompt=True
    ).to(model.device)
    input_len = inputs["input_ids"].shape[1]

    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0,
        do_sample=False,
        repetition_penalty=1.3
    )
    response = tokenizer.decode(
        outputs[0][input_len:], skip_special_tokens=True
    ).strip()
    response = re.sub(r'```json|```', '', response).strip()

    try:
        parsed = json.loads(response)
        parsed = clean_result(parsed)  # ← додай цей рядок
        return json.dumps(parsed, indent=2, ensure_ascii=False)
    except json.JSONDecodeError:
        return response

demo = gr.Interface(
    fn=extract_all,
    inputs=gr.Textbox(label="Input Text", lines=8),
    outputs=gr.Textbox(label="Extracted JSON", lines=12)
)
demo.launch()