Spaces:

aifakepro
/

LFM

Sleeping

App Files Files Community

aifakepro commited on Mar 18

Commit

79d2b6d

verified ·

1 Parent(s): 980b14c

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -36

app.py CHANGED Viewed

@@ -6,24 +6,15 @@ model_id = "LiquidAI/LFM2-350M-Extract"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
-# Крок 1: знайти всі згадки товарів у тексті
-split_prompt = """Find all product orders in this text.
-Return ONLY a JSON array of strings, one per order mention.
-Example:
-Input: "We need 3 pens at $1.50 and also please send 2 notebooks for $4.99 each"
-Output: ["3 pens at $1.50", "2 notebooks for $4.99"]
-Return ONLY the JSON array."""
-# Крок 2: витягти поля з одного фрагменту
-extract_prompt = """Extract order info from this text fragment.
-Return ONLY this JSON, no extra fields:
-{"product": "<name>", "price": <number>, "quantity": <number>}
-Example:
-Input: "3 units of Blue Pen at $1.50 each"
-Output: {"product": "Blue Pen", "price": 1.50, "quantity": 3}"""
 def run_model(system, user, max_tokens=128):
     messages = [
@@ -49,27 +40,33 @@ def run_model(system, user, max_tokens=128):
     return re.sub(r'```json|```', '', response).strip()
 def extract_all(user_input):
-    # --- Крок 1: розбити текст на фрагменти ---
-    raw = run_model(split_prompt, user_input, max_tokens=256)
-    try:
-        fragments = json.loads(raw)
-        if not isinstance(fragments, list):
-            fragments = [user_input]  # fallback
-    except json.JSONDecodeError:
-        fragments = [user_input]      # fallback — весь текст як один
-    # --- Крок 2: обробити кожен фрагмент ---
-    results = []
-    for fragment in fragments:
-        raw_item = run_model(extract_prompt, fragment, max_tokens=64)
-        try:
-            item = json.loads(raw_item)
-            results.append(item)
-        except json.JSONDecodeError:
-            # логуємо що не розпарсилось
-            results.append({"error": "failed", "raw": fragment})
-    return json.dumps(results, indent=2, ensure_ascii=False)
 demo = gr.Interface(
     fn=extract_all,

 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
+system_prompt = """Identify and extract information matching the following schema.
+Return data as a JSON object. Missing data should be omitted.
+Schema:
+- orders: list of objects:
+  - product: Product name
+  - price: Price as number without $ sign
+  - quantity: Number of items as integer
+"""
 def run_model(system, user, max_tokens=128):
     messages = [
     return re.sub(r'```json|```', '', response).strip()
 def extract_all(user_input):
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_input}
+    ]
+    inputs = tokenizer.apply_chat_template(
+        messages, return_tensors="pt", return_dict=True,
+        add_generation_prompt=True
+    ).to(model.device)
+    input_len = inputs["input_ids"].shape[1]
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=256,
+        temperature=0,
+        do_sample=False,
+        repetition_penalty=1.3
+    )
+    response = tokenizer.decode(
+        outputs[0][input_len:], skip_special_tokens=True
+    ).strip()
+    response = re.sub(r'```json|```', '', response).strip()
+    try:
+        parsed = json.loads(response)
+        return json.dumps(parsed, indent=2, ensure_ascii=False)
+    except json.JSONDecodeError:
+        return response
 demo = gr.Interface(
     fn=extract_all,