Spaces:

suneeldk
/

text-to-json

Sleeping

App Files Files Community

suneeldk commited on 25 days ago

Commit

0d6ff7a

verified ·

1 Parent(s): be027a7

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -13

app.py CHANGED Viewed

@@ -1,17 +1,17 @@
 import gradio as gr
 import json
 import spaces
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from peft import PeftModel
 # ── Load model once at startup ──────────────────────────────
 BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
-LORA_MODEL = "suneeldk/json-extract"
 tokenizer = AutoTokenizer.from_pretrained(LORA_MODEL)
-# Load in 4-bit for faster inference
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
@@ -25,10 +25,54 @@ base_model = AutoModelForCausalLM.from_pretrained(
 )
 model = PeftModel.from_pretrained(base_model, LORA_MODEL)
-model = model.merge_and_unload()  # Merge LoRA into base — removes adapter overhead
 model.eval()
 # ── Auto-detect schema from text ────────────────────────────
 def auto_schema(text):
     text_lower = text.lower()
@@ -60,8 +104,8 @@ def auto_schema(text):
         schema["item"] = "string|null"
         schema["quantity"] = "string|null"
-    location_keywords = ["from", "to", "at", "in", "store", "shop", "restaurant",
-                         "station", "airport", "hotel", "office", "train", "flight", "bus"]
     if any(k in text_lower for k in location_keywords):
         schema["location"] = "string|null"
@@ -107,23 +151,30 @@ def extract(text, custom_schema):
     schema_str = json.dumps(schema)
     prompt = f"### Input: {text}\n### Schema: {schema_str}\n### Output:"
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=128,       # JSON output is short, no need for 512
-            do_sample=False,           # Greedy decoding — faster than sampling
             pad_token_id=tokenizer.eos_token_id,
         )
-    # Decode only the new tokens, skip the prompt
-    new_tokens = outputs[0][inputs["input_ids"].shape[1]:]
     output_part = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
-    try:
-        parsed = json.loads(output_part)
         return json.dumps(parsed, indent=2, ensure_ascii=False), json.dumps(schema, indent=2)
-    except json.JSONDecodeError:
         return output_part, json.dumps(schema, indent=2)

 import gradio as gr
 import json
+import re
 import spaces
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, StoppingCriteria, StoppingCriteriaList
 from peft import PeftModel
 # ── Load model once at startup ──────────────────────────────
 BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
+LORA_MODEL = "suneeldk/json-extract"  # ← change this
 tokenizer = AutoTokenizer.from_pretrained(LORA_MODEL)
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
 )
 model = PeftModel.from_pretrained(base_model, LORA_MODEL)
+model = model.merge_and_unload()
 model.eval()
+# ── Stop generation when JSON is complete ───────────────────
+class StopOnJsonComplete(StoppingCriteria):
+    """Stop generating once we have a complete JSON object."""
+    def __init__(self, tokenizer, prompt_length):
+        self.tokenizer = tokenizer
+        self.prompt_length = prompt_length
+    def __call__(self, input_ids, scores, **kwargs):
+        new_tokens = input_ids[0][self.prompt_length:]
+        text = self.tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
+        if not text.startswith("{"):
+            return False
+        # Count braces to detect complete JSON
+        depth = 0
+        for char in text:
+            if char == "{":
+                depth += 1
+            elif char == "}":
+                depth -= 1
+                if depth == 0:
+                    return True  # JSON object is complete, stop!
+        return False
+# ── Extract first valid JSON from text ──────────────────────
+def extract_json(text):
+    """Find and return the first complete JSON object in text."""
+    depth = 0
+    start = None
+    for i, char in enumerate(text):
+        if char == "{":
+            if start is None:
+                start = i
+            depth += 1
+        elif char == "}":
+            depth -= 1
+            if depth == 0 and start is not None:
+                try:
+                    return json.loads(text[start:i + 1])
+                except json.JSONDecodeError:
+                    start = None
+    return None
 # ── Auto-detect schema from text ────────────────────────────
 def auto_schema(text):
     text_lower = text.lower()
         schema["item"] = "string|null"
         schema["quantity"] = "string|null"
+    location_keywords = ["store", "shop", "restaurant", "station", "airport",
+                         "hotel", "office"]
     if any(k in text_lower for k in location_keywords):
         schema["location"] = "string|null"
     schema_str = json.dumps(schema)
     prompt = f"### Input: {text}\n### Schema: {schema_str}\n### Output:"
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    prompt_length = inputs["input_ids"].shape[1]
+    # Stop as soon as JSON is complete
+    stop_criteria = StoppingCriteriaList([
+        StopOnJsonComplete(tokenizer, prompt_length)
+    ])
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=128,
+            do_sample=False,
             pad_token_id=tokenizer.eos_token_id,
+            stopping_criteria=stop_criteria,
         )
+    new_tokens = outputs[0][prompt_length:]
     output_part = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
+    # Extract just the JSON, ignore any trailing garbage
+    parsed = extract_json(output_part)
+    if parsed:
         return json.dumps(parsed, indent=2, ensure_ascii=False), json.dumps(schema, indent=2)
+    else:
         return output_part, json.dumps(schema, indent=2)