aifakepro commited on
Commit
79d2b6d
·
verified ·
1 Parent(s): 980b14c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -36
app.py CHANGED
@@ -6,24 +6,15 @@ model_id = "LiquidAI/LFM2-350M-Extract"
6
  tokenizer = AutoTokenizer.from_pretrained(model_id)
7
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
8
 
9
- # Крок 1: знайти всі згадки товарів у тексті
10
- split_prompt = """Find all product orders in this text.
11
- Return ONLY a JSON array of strings, one per order mention.
12
 
13
- Example:
14
- Input: "We need 3 pens at $1.50 and also please send 2 notebooks for $4.99 each"
15
- Output: ["3 pens at $1.50", "2 notebooks for $4.99"]
16
-
17
- Return ONLY the JSON array."""
18
-
19
- # Крок 2: витягти поля з одного фрагменту
20
- extract_prompt = """Extract order info from this text fragment.
21
- Return ONLY this JSON, no extra fields:
22
- {"product": "<name>", "price": <number>, "quantity": <number>}
23
-
24
- Example:
25
- Input: "3 units of Blue Pen at $1.50 each"
26
- Output: {"product": "Blue Pen", "price": 1.50, "quantity": 3}"""
27
 
28
  def run_model(system, user, max_tokens=128):
29
  messages = [
@@ -49,27 +40,33 @@ def run_model(system, user, max_tokens=128):
49
  return re.sub(r'```json|```', '', response).strip()
50
 
51
  def extract_all(user_input):
52
- # --- Крок 1: розбити текст на фрагменти ---
53
- raw = run_model(split_prompt, user_input, max_tokens=256)
54
- try:
55
- fragments = json.loads(raw)
56
- if not isinstance(fragments, list):
57
- fragments = [user_input] # fallback
58
- except json.JSONDecodeError:
59
- fragments = [user_input] # fallback — весь текст як один
 
60
 
61
- # --- Крок 2: обробити кожен фрагмент ---
62
- results = []
63
- for fragment in fragments:
64
- raw_item = run_model(extract_prompt, fragment, max_tokens=64)
65
- try:
66
- item = json.loads(raw_item)
67
- results.append(item)
68
- except json.JSONDecodeError:
69
- # логуємо що не розпарсилось
70
- results.append({"error": "failed", "raw": fragment})
 
71
 
72
- return json.dumps(results, indent=2, ensure_ascii=False)
 
 
 
 
73
 
74
  demo = gr.Interface(
75
  fn=extract_all,
 
6
  tokenizer = AutoTokenizer.from_pretrained(model_id)
7
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
8
 
9
+ system_prompt = """Identify and extract information matching the following schema.
10
+ Return data as a JSON object. Missing data should be omitted.
 
11
 
12
+ Schema:
13
+ - orders: list of objects:
14
+ - product: Product name
15
+ - price: Price as number without $ sign
16
+ - quantity: Number of items as integer
17
+ """
 
 
 
 
 
 
 
 
18
 
19
  def run_model(system, user, max_tokens=128):
20
  messages = [
 
40
  return re.sub(r'```json|```', '', response).strip()
41
 
42
  def extract_all(user_input):
43
+ messages = [
44
+ {"role": "system", "content": system_prompt},
45
+ {"role": "user", "content": user_input}
46
+ ]
47
+ inputs = tokenizer.apply_chat_template(
48
+ messages, return_tensors="pt", return_dict=True,
49
+ add_generation_prompt=True
50
+ ).to(model.device)
51
+ input_len = inputs["input_ids"].shape[1]
52
 
53
+ outputs = model.generate(
54
+ **inputs,
55
+ max_new_tokens=256,
56
+ temperature=0,
57
+ do_sample=False,
58
+ repetition_penalty=1.3
59
+ )
60
+ response = tokenizer.decode(
61
+ outputs[0][input_len:], skip_special_tokens=True
62
+ ).strip()
63
+ response = re.sub(r'```json|```', '', response).strip()
64
 
65
+ try:
66
+ parsed = json.loads(response)
67
+ return json.dumps(parsed, indent=2, ensure_ascii=False)
68
+ except json.JSONDecodeError:
69
+ return response
70
 
71
  demo = gr.Interface(
72
  fn=extract_all,