Spaces:

nitya001
/

test

Sleeping

App Files Files Community

nitya001 commited on Nov 23, 2025

Commit

29684f5

verified ·

1 Parent(s): d3326e2

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -67

app.py CHANGED Viewed

@@ -1,102 +1,108 @@
 import torch
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
-# ---------------- CONFIG ---------------- #
-BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"       # Base model
-LORA_REPO = "nitya001/autotrain-4n1y9-5ekvs"            # Your AutoTrain LoRA repo
-SYSTEM_PROMPT = (
-    "You are a helpful banking and loan support assistant. "
-    "You answer short, clear, and factual responses about UTRs, EMIs, loan summaries, "
-    "payment issues, and basic loan help. If unsure, respond generically."
-)
-device = "cpu"
-# ---------------- LOAD TOKENIZER ---------------- #
-print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
-if tokenizer.pad_token is None:
-    tokenizer.pad_token = tokenizer.eos_token
-# ---------------- LOAD BASE MODEL ---------------- #
-print("Loading base model...")
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
-    torch_dtype=torch.float32,
-    device_map=device,
-)
-# ---------------- LOAD LORA ADAPTER ---------------- #
-print(f"Loading LoRA adapter from {LORA_REPO} ...")
-model = PeftModel.from_pretrained(
-    base_model,
-    LORA_REPO,
 )
 model.eval()
-# ---------------- CHAT FUNCTION ---------------- #
-def chat_fn(message, history):
     """
-    Gradio ChatInterface callback.
-    history: list of [user, bot]
     """
-    # Build conversation text
-    conversation = f"System: {SYSTEM_PROMPT}\n"
-    for user_msg, bot_msg in history:
-        conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
-    conversation += f"User: {message}\nAssistant:"
-    inputs = tokenizer(conversation, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=150,
             do_sample=True,
             top_p=0.9,
             temperature=0.7,
             pad_token_id=tokenizer.eos_token_id,
         )
-    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract only the latest answer after the last "Assistant:"
-    if "Assistant:" in full_output:
-        reply = full_output.split("Assistant:")[-1].strip()
-    else:
-        reply = full_output.strip()
-    history.append((message, reply))
-    return history, history
-# ---------------- GRADIO UI ---------------- #
-demo = gr.ChatInterface(
-    fn=chat_fn,
-    title="💬 TinyLoan Assistant (TinyLlama + AutoTrain LoRA)",
-    description="Ask about UTR, loan summaries, EMIs, transactions, or payment issues.",
-    examples=[
-        "What is my latest UTR?",
-        "Generate my loan summary.",
-        "Show my transactions.",
-        "My payment is stuck, what should I do?",
-    ],
-)
 if __name__ == "__main__":
     demo.launch()

 import torch
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
+# ----- CONFIG -----
+BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+LORA_REPO = "nitya001/autotrain-4n1y9-5ekvs"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# ----- LOAD TOKENIZER -----
 tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+# Make sure we have a pad token
+if tokenizer.pad_token_id is None:
+    tokenizer.pad_token_id = tokenizer.eos_token_id
+# ----- LOAD BASE MODEL -----
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
 )
+base_model.to(device)
+# ----- LOAD LORA ADAPTER -----
+model = PeftModel.from_pretrained(base_model, LORA_REPO)
+model.to(device)
 model.eval()
+# ----- HELPER: BUILD PROMPT FROM HISTORY -----
+def build_prompt(history, user_message: str) -> str:
     """
+    history: list of (user, assistant) pairs
+    user_message: latest user text
     """
+    chat = ""
+    if history is None:
+        history = []
+    # If your TinyLlama uses chat tokens like <|user|> / <|assistant|>,
+    # we format the conversation that way.
+    for user, assistant in history:
+        if not user and not assistant:
+            continue
+        chat += f"<|user|>\n{user}\n<|assistant|>\n{assistant}\n"
+    chat += f"<|user|>\n{user_message}\n<|assistant|>\n"
+    return chat
+# ----- CHAT FUNCTION (THIS IS WHAT GRADIO CALLS) -----
+def chat_fn(user_message, history):
+    if history is None:
+        history = []
+    prompt = build_prompt(history, user_message)
+    inputs = tokenizer(
+        prompt,
+        return_tensors="pt",
+        truncation=True,
+        max_length=2048,
+    ).to(device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=256,
             do_sample=True,
             top_p=0.9,
             temperature=0.7,
             pad_token_id=tokenizer.eos_token_id,
         )
+    # Only the newly generated tokens after the prompt
+    generated_ids = outputs[0][inputs["input_ids"].shape[-1] :]
+    answer = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
+    # Update history as list of (user, assistant)
+    history.append((user_message, answer))
+    # 🔴 IMPORTANT: return ONLY `history`, NOT `(history, history)` 🔴
+    return history
+# ----- GRADIO UI -----
+with gr.Blocks() as demo:
+    gr.Markdown("## TinyLlama + LoRA – Custom Chatbot")
+    chatbot = gr.Chatbot(
+        label="Chat",
+        type="tuple",  # list of (user, assistant)
+        height=500,
+    )
+    msg = gr.Textbox(
+        label="Your message",
+        placeholder="Ask something...",
+    )
+    clear = gr.Button("Clear")
+    # On submit: send msg + current chatbot history into chat_fn
+    # and update ONLY the chatbot with the returned history
+    msg.submit(chat_fn, inputs=[msg, chatbot], outputs=[chatbot])
+    clear.click(lambda: [], None, chatbot)
 if __name__ == "__main__":
     demo.launch()