Spaces:

nitya001
/

test

Sleeping

App Files Files Community

nitya001 commited on Nov 23, 2025

Commit

a2ebcc6

verified ·

1 Parent(s): 20c6d29

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -18

app.py CHANGED Viewed

@@ -1,19 +1,21 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 # ---------------- CONFIG ---------------- #
 BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"       # Base model
-LORA_REPO = "nitya001/autotrain-4n1y9-5ekvs"            # Your fine-tuned LoRA repo
-# System prompt for behavior shaping
 SYSTEM_PROMPT = (
     "You are a helpful banking and loan support assistant. "
     "You answer short, clear, and factual responses about UTRs, EMIs, loan summaries, "
     "payment issues, and basic loan help. If unsure, respond generically."
 )
 # ---------------- LOAD TOKENIZER ---------------- #
@@ -24,34 +26,38 @@ if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
-# ---------------- LOAD MODEL + LORA ---------------- #
-print("Loading base model + LoRA...")
-model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     torch_dtype=torch.float32,
-    device_map="cpu",                 # CPU runtime
-    adapter_id=LORA_REPO,             # <--- THE MAGIC LINE
 )
 model.eval()
-device = "cpu"
 # ---------------- CHAT FUNCTION ---------------- #
 def chat_fn(message, history):
     """
     history: list of [user, bot]
     """
-    # Build conversation with system prompt
     conversation = f"System: {SYSTEM_PROMPT}\n"
     for user_msg, bot_msg in history:
         conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
     conversation += f"User: {message}\nAssistant:"
     inputs = tokenizer(conversation, return_tensors="pt").to(device)
@@ -68,7 +74,7 @@ def chat_fn(message, history):
     full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract only latest answer
     if "Assistant:" in full_output:
         reply = full_output.split("Assistant:")[-1].strip()
     else:
@@ -82,13 +88,13 @@ def chat_fn(message, history):
 demo = gr.ChatInterface(
     fn=chat_fn,
-    title="💬 TinyLoan Assistant (TinyLlama + LoRA)",
     description="Ask about UTR, loan summaries, EMIs, transactions, or payment issues.",
     examples=[
         "What is my latest UTR?",
-        "Generate my loan summary",
-        "Show my recent transactions",
-        "My payment is stuck, what to do?"
     ],
 )

 import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel
 # ---------------- CONFIG ---------------- #
 BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"       # Base model
+LORA_REPO = "nitya001/autotrain-4n1y9-5ekvs"            # Your AutoTrain LoRA repo
 SYSTEM_PROMPT = (
     "You are a helpful banking and loan support assistant. "
     "You answer short, clear, and factual responses about UTRs, EMIs, loan summaries, "
     "payment issues, and basic loan help. If unsure, respond generically."
 )
+device = "cpu"
 # ---------------- LOAD TOKENIZER ---------------- #
     tokenizer.pad_token = tokenizer.eos_token
+# ---------------- LOAD BASE MODEL ---------------- #
+print("Loading base model...")
+base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     torch_dtype=torch.float32,
+    device_map=device,
+)
+# ---------------- LOAD LORA ADAPTER ---------------- #
+print(f"Loading LoRA adapter from {LORA_REPO} ...")
+model = PeftModel.from_pretrained(
+    base_model,
+    LORA_REPO,
 )
 model.eval()
 # ---------------- CHAT FUNCTION ---------------- #
 def chat_fn(message, history):
     """
+    Gradio ChatInterface callback.
     history: list of [user, bot]
     """
+    # Build conversation text
     conversation = f"System: {SYSTEM_PROMPT}\n"
     for user_msg, bot_msg in history:
         conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
     conversation += f"User: {message}\nAssistant:"
     inputs = tokenizer(conversation, return_tensors="pt").to(device)
     full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the latest answer after the last "Assistant:"
     if "Assistant:" in full_output:
         reply = full_output.split("Assistant:")[-1].strip()
     else:
 demo = gr.ChatInterface(
     fn=chat_fn,
+    title="💬 TinyLoan Assistant (TinyLlama + AutoTrain LoRA)",
     description="Ask about UTR, loan summaries, EMIs, transactions, or payment issues.",
     examples=[
         "What is my latest UTR?",
+        "Generate my loan summary.",
+        "Show my transactions.",
+        "My payment is stuck, what should I do?",
     ],
 )