Spaces:

iqasimz
/

test2

Sleeping

App Files Files Community

iqasimz commited on Aug 18, 2025

Commit

2f74bd4

verified ·

1 Parent(s): 716ad32

Update app.py

Browse files

Files changed (1) hide show

app.py +229 -0

app.py CHANGED Viewed

	@@ -0,0 +1,229 @@

+# app.py
+import os
+import json
+import warnings
+import re
+import torch
+import gradio as gr
+import spaces
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import StoppingCriteria, StoppingCriteriaList
+# ---------- CONFIG ----------
+os.environ.setdefault("GRADIO_SERVER_PORT", "7860")
+MODEL_PATH = "iqasimz/g1"  # <- change to your repo or local dir
+MAX_NEW_TOKENS_DEFAULT = 300
+TEMPERATURE_DEFAULT = 0.2
+TOP_P_DEFAULT = 1.0
+# ---------------------------
+warnings.filterwarnings("ignore", module="torch")
+_model_cache = {}
+def _ensure_pad_token(tokenizer):
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    return tokenizer
+def load_model_to_cpu(model_dir: str):
+    """Load tokenizer+model once on CPU; moved to GPU per request via @spaces.GPU."""
+    if model_dir in _model_cache:
+        return _model_cache[model_dir]
+    tok = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
+    tok = _ensure_pad_token(tok)
+    mdl = AutoModelForCausalLM.from_pretrained(
+        model_dir,
+        trust_remote_code=True,
+        torch_dtype=torch.float16,   # model runs in fp16 when moved to GPU
+        device_map=None,             # keep on CPU for caching
+    )
+    mdl.eval()
+    _model_cache[model_dir] = (tok, mdl)
+    print(f"[cache] Loaded {model_dir} on CPU")
+    return tok, mdl
+def build_inference_prompt(paragraph: str) -> str:
+    # Match your training format EXACTLY (Task + Rules + Paragraph in user turn)
+    task_block = """Task: You are an expert argument analyst. Number the sentences in the paragraph and tag the role of each one.
+Rules:
+- Do NOT change the text of any sentence.
+- Keep the original order.
+- Output exactly N lines, one per sentence.
+- Each line must be: "<index> <original sentence> <role>", where role ∈ {claim, premise, none}.
+- Do not add any explanations or extra text after the Nth line.
+"""
+    # Chat-style formatting used during training
+    return (
+        f"<|im_start|>user\n{task_block}\nParagraph:\n{paragraph}"
+        f"<|im_end|>\n<|im_start|>assistant\n"
+    )
+# -------- Sentence counting for N --------
+SENT_SPLIT_RE = re.compile(r'(?<!\b[A-Z])(?<=[.!?])\s+(?=\S)')
+def count_sentences(paragraph: str) -> int:
+    p = (paragraph or "").strip()
+    if not p:
+        return 0
+    parts = [s.strip() for s in SENT_SPLIT_RE.split(p) if s.strip()]
+    return max(1, len(parts))
+# -------- Stopping criteria to halt after N labeled lines --------
+class RoleLinesStop(StoppingCriteria):
+    """
+    Stop when we've generated N lines that look like:
+      <index> <original sentence> <role>
+    with role ∈ {claim, premise, none}.
+    Also stops if the model begins line N+1 (e.g., "N+1 ").
+    """
+    def __init__(self, tokenizer, prompt_len: int, n_lines: int):
+        self.tok = tokenizer
+        self.prompt_len = prompt_len
+        self.n_lines = n_lines
+        self.role_line_re = re.compile(
+            r'^\s*\d+\s+.+\s+(?:claim|premise|none)\s*$', re.IGNORECASE | re.MULTILINE
+        )
+        self.next_index_re = re.compile(rf'^\s*{n_lines+1}\s', re.MULTILINE) if n_lines >= 1 else None
+    def __call__(self, input_ids, scores, **kwargs) -> bool:
+        gen_ids = input_ids[0, self.prompt_len:]
+        if gen_ids.numel() == 0:
+            return False
+        text = self.tok.decode(gen_ids, skip_special_tokens=True)
+        # If we see the start of line N+1, stop immediately
+        if self.next_index_re and self.next_index_re.search(text):
+            return True
+        # Count complete role-tagged lines
+        complete_lines = self.role_line_re.findall(text)
+        return len(complete_lines) >= self.n_lines
+def parse_numbered_lines(text: str):
+    """
+    Optional: parse lines like:
+    1 Some sentence. claim
+    2 Another sentence. premise
+    into a list of dicts.
+    """
+    results = []
+    for line in text.splitlines():
+        line = line.strip()
+        if not line or not line[0].isdigit():
+            continue
+        try:
+            # index first
+            space_after_idx = line.find(" ")
+            idx = int(line[:space_after_idx])
+            rest = line[space_after_idx + 1:].rstrip()
+            # last space => role
+            last_space = rest.rfind(" ")
+            sent = rest[:last_space].strip()
+            role = rest[last_space + 1:].strip().lower()
+            results.append({"index": idx, "sentence": sent, "role": role})
+        except Exception:
+            pass
+    return results
+@spaces.GPU(duration=120)
+def analyze(paragraph: str, max_new_tokens: int, temperature: float, top_p: float, show_parsed: bool):
+    paragraph = (paragraph or "").strip()
+    if not paragraph:
+        return "Please paste a paragraph.", ""
+    tokenizer, model = load_model_to_cpu(MODEL_PATH)
+    model = model.to("cuda")
+    prompt = build_inference_prompt(paragraph)
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    # Compute target number of lines (N) and install stopping criteria
+    n_lines = count_sentences(paragraph)
+    stopper = RoleLinesStop(
+        tokenizer=tokenizer,
+        prompt_len=inputs["input_ids"].shape[1],
+        n_lines=n_lines
+    )
+    stops = StoppingCriteriaList([stopper])
+    with torch.inference_mode():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=int(max_new_tokens),
+            temperature=float(temperature),
+            top_p=float(top_p),
+            do_sample=(float(temperature) > 0.0),  # sampling only if temp > 0
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            use_cache=True,
+            stopping_criteria=stops,
+        )
+    full = tokenizer.decode(output[0], skip_special_tokens=False)
+    # Extract assistant segment
+    if "<|im_start|>assistant\n" in full:
+        resp = full.split("<|im_start|>assistant\n")[-1]
+        resp = resp.split("<|im_end|>")[0].strip()
+    else:
+        resp = full.strip()
+    # Safety net: hard-trim to exactly N labeled lines if model leaked extras
+    role_line_re = re.compile(r'^\s*\d+\s+.+\s+(?:claim|premise|none)\s*$', re.IGNORECASE | re.MULTILINE)
+    matched = role_line_re.findall(resp)
+    if matched:
+        trimmed = "\n".join(matched[:n_lines]).strip()
+        if trimmed:
+            resp = trimmed
+    parsed = parse_numbered_lines(resp)
+    parsed_json = json.dumps(parsed, ensure_ascii=False, indent=2) if show_parsed else ""
+    return resp, parsed_json
+def launch_app():
+    with gr.Blocks(title="Argument Role Tagger (DeepSeek 1.5B + LoRA merged)") as demo:
+        gr.Markdown("## Argument Role Tagger")
+        gr.Markdown(
+            "Paste a paragraph. The model will number sentences and label each as **claim**, **premise**, or **none**."
+        )
+        with gr.Row():
+            with gr.Column(scale=2):
+                paragraph = gr.Textbox(
+                    label="Paragraph",
+                    lines=10,
+                    placeholder="Paste your paragraph…",
+                    value=("Governments should subsidize solar panels to accelerate clean energy adoption. "
+                           "Lowering installation costs would encourage more households to switch, reducing fossil fuel dependence. "
+                           "In the long run, this shift could stabilize energy prices and reduce environmental damage.")
+                )
+                with gr.Row():
+                    max_new_tokens = gr.Slider(64, 1024, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
+                with gr.Row():
+                    temperature = gr.Slider(0.0, 1.0, value=TEMPERATURE_DEFAULT, step=0.05, label="Temperature")
+                    top_p = gr.Slider(0.5, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
+                show_parsed = gr.Checkbox(value=True, label="Show parsed JSON")
+                run_btn = gr.Button("Analyze", variant="primary")
+            with gr.Column(scale=3):
+                raw_out = gr.Textbox(label="Model Output (raw)", lines=18, show_copy_button=True)
+                parsed_out = gr.Code(label="Parsed JSON", language="json")
+        run_btn.click(
+            analyze,
+            inputs=[paragraph, max_new_tokens, temperature, top_p, show_parsed],
+            outputs=[raw_out, parsed_out],
+        )
+        gr.Markdown("### Tips")
+        gr.Markdown("- Set `MODEL_PATH` at the top to your merged model repo or local path.\n"
+                    "- For deterministic outputs, set Temperature=0.0 and Top-p=1.0.\n"
+                    "- Output is forcibly stopped after exactly N lines.")
+    return demo
+if __name__ == "__main__":
+    app = launch_app()
+    app.launch(share=True)