Spaces:

hin123123
/

eng_llm_gemma

Paused

App Files Files Community

hin123123 commited on Jan 24

Commit

97cc9f8

verified ·

1 Parent(s): 42f231d

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -15

app.py CHANGED Viewed

@@ -2,8 +2,10 @@ import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-MODEL_ID = "hin123123/gemma2-2b-it-slp-merged"  # merged repo from Colab
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_ID,
     use_fast=True,
@@ -24,17 +26,39 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 model.eval()
 SYSTEM_PROMPT = (
-    "You are a speech-language pathology assistant. "
-    "You analyze child speech production errors and respond ONLY with JSON. "
-    "Use concise, accurate outputs."
 )
-def run(user_text, max_new_tokens=256, temperature=0.0, top_p=1.0, repetition_penalty=1.05):
     messages = [
         {"role": "system", "content": SYSTEM_PROMPT},
         {"role": "user", "content": user_text},
     ]
     prompt = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
@@ -56,27 +80,31 @@ def run(user_text, max_new_tokens=256, temperature=0.0, top_p=1.0, repetition_pe
         )
     gen = out_ids[0, inputs["input_ids"].shape[1]:]
-    return tokenizer.decode(gen, skip_special_tokens=True).strip()
 demo = gr.Interface(
     fn=run,
     inputs=[
         gr.Textbox(
-            label="User text / Case JSON",
-            lines=4,
-            value=(
-                "Instructions: Classify or reflect the user's spoken attempt using Substitution, "
-                "Omission, or Addition, and respond with JSON only.\n\n"
-                'Case JSON: {"target": "mop", "ipa_target": "/mɑp/", "attempt": "mo", "ipa_attempt": "/mɑ/"}'
-            ),
         ),
         gr.Slider(8, 1024, 256, step=1, label="max_new_tokens"),
         gr.Slider(0, 1, 0.0, step=0.05, label="temperature"),
         gr.Slider(0.1, 1.0, 1.0, step=0.05, label="top_p"),
         gr.Slider(1.0, 1.5, 1.05, step=0.01, label="repetition_penalty"),
     ],
-    outputs=gr.Textbox(label="Model output"),
-    title="Gemma-2-2B-IT SLP JSON API (Merged)",
     api_name="run",
 )

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# ===== Model =====
+MODEL_ID = "hin123123/gemma2-2b-it-slp-merged"  # merged model you created
+# ===== Tokenizer & Model Load =====
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_ID,
     use_fast=True,
 )
 model.eval()
+# ===== Prompts =====
 SYSTEM_PROMPT = (
+    "You are an articulation/phonology error expert SLP assistant. "
+    "You only respond with valid JSON, never explanations."
 )
+TRAIN_INSTRUCTION = (
+    "Instructions: Classify or reflect the user's spoken attempt using Substitution, Omission, or Addition. "
+    "Include subtype and return JSON with keys: disorder, category, subtype, target, attempt, ipa_target, ipa_attempt, "
+    "correct_rate(pcc), severity_level, evidence, suggestions. "
+    "If present, also include task scaffolding keys: task_type, times_read, marks, daily_max_marks, task_label, "
+    "sentence_target, sentence_attempt. Return JSON only.\n"
+)
+def run(case_json: str,
+        max_new_tokens: int = 256,
+        temperature: float = 0.0,
+        top_p: float = 1.0,
+        repetition_penalty: float = 1.05):
+    case_json = case_json.strip()
+    if not case_json:
+        return "{}",  # empty JSON
+    # Build user text exactly like training
+    user_text = TRAIN_INSTRUCTION + "Case JSON:\n" + case_json
     messages = [
         {"role": "system", "content": SYSTEM_PROMPT},
         {"role": "user", "content": user_text},
     ]
     prompt = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         )
     gen = out_ids[0, inputs["input_ids"].shape[1]:]
+    text = tokenizer.decode(gen, skip_special_tokens=True).strip()
+    return text
 demo = gr.Interface(
     fn=run,
     inputs=[
         gr.Textbox(
+            label="Case JSON",
+            lines=6,
+            value='{"target": "recording", "ipa_target": "/ɹəˈkɔɹdɪŋ/", "attempt": "wecording", "ipa_attempt": "/wəˈkɔɹdɪŋ/"}',
         ),
         gr.Slider(8, 1024, 256, step=1, label="max_new_tokens"),
         gr.Slider(0, 1, 0.0, step=0.05, label="temperature"),
         gr.Slider(0.1, 1.0, 1.0, step=0.05, label="top_p"),
         gr.Slider(1.0, 1.5, 1.05, step=0.01, label="repetition_penalty"),
     ],
+    outputs=gr.Textbox(label="Model output (JSON expected)"),
+    title="Gemma-2-2B-IT SLP JSON API (Merged, 283k dataset)",
+    description=(
+        "Paste a single case as JSON (target, attempt, ipa_target, ipa_attempt, etc.).\n"
+        "The model was fine-tuned to output JSON with keys: "
+        "disorder, category, subtype, target, attempt, ipa_target, ipa_attempt, "
+        "correct_rate(pcc), severity_level, evidence, suggestions, and optional task_* keys."
+    ),
     api_name="run",
 )