File size: 1,920 Bytes
897ace2 980b14c 897ace2 9bb9154 897ace2 00de7fe 79d2b6d 00de7fe 980b14c 10d6469 980b14c 79d2b6d 980b14c 79d2b6d 980b14c 79d2b6d 10d6469 79d2b6d 897ace2 980b14c 897ace2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import json, re
model_id = "LiquidAI/LFM2-350M-Extract"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
system_prompt = """Return data as a JSON object with the following schema:
- orders: list of objects:
- product: Product name
- price: Price as number without $ sign
- quantity: Number of items as integer"""
def clean_result(parsed):
for order in parsed.get("orders", []):
if "price" in order:
# "$29.99" → 29.99
price = str(order["price"]).replace("$", "").replace(",", "").strip()
try:
order["price"] = float(price)
except ValueError:
pass
return parsed
def extract_all(user_input):
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input}
]
inputs = tokenizer.apply_chat_template(
messages, return_tensors="pt", return_dict=True,
add_generation_prompt=True
).to(model.device)
input_len = inputs["input_ids"].shape[1]
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0,
do_sample=False,
repetition_penalty=1.3
)
response = tokenizer.decode(
outputs[0][input_len:], skip_special_tokens=True
).strip()
response = re.sub(r'```json|```', '', response).strip()
try:
parsed = json.loads(response)
parsed = clean_result(parsed) # ← додай цей рядок
return json.dumps(parsed, indent=2, ensure_ascii=False)
except json.JSONDecodeError:
return response
demo = gr.Interface(
fn=extract_all,
inputs=gr.Textbox(label="Input Text", lines=8),
outputs=gr.Textbox(label="Extracted JSON", lines=12)
)
demo.launch() |