Spaces:

Vivek16
/

vvv

Runtime error

App Files Files Community

Vivek16 commited on Oct 21, 2025

Commit

afa0209

verified ·

1 Parent(s): 268d8d0

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -30

app.py CHANGED Viewed

@@ -3,19 +3,19 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
-# --- Configuration ---
-# Your model repository ID
 BASE_MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 ADAPTER_MODEL_ID = "Vivek16/Root_Math-TinyLlama-CPU"
-# Define the instruction template components for conversational turn-taking
-# FIX: Relaxed the prompt to allow for general chat/greetings
-SYSTEM_INSTRUCTION = "You are a helpful and polite math assistant. Your main goal is to solve math problems, but you can also answer general greetings or small talk."
 USER_TEMPLATE = "<|user|>\n{}</s>"
 ASSISTANT_TEMPLATE = "<|assistant|>\n{}</s>"
-# --- Model Loading Function ---
 def load_model():
     """Loads the base model and merges the LoRA adapters."""
     print("Loading base model...")
@@ -37,25 +37,17 @@ def load_model():
     print("Model loaded and merged successfully!")
     return tokenizer, model
-# Load the model outside the prediction function for efficiency
 tokenizer, model = load_model()
-# --- Prediction Function for gr.ChatInterface ---
 def generate_response(message, history):
     """Generates a response using chat history and the fine-tuned model."""
-    # 1. Build the full prompt using the TinyLlama Chat template
     # Start with the system instruction
     full_prompt = f"<|system|>\n{SYSTEM_INSTRUCTION}</s>\n"
-    # --- FEW-SHOT EXAMPLE to handle greetings (FIXED "ciao" and "hi" issue) ---
-    # This teaches the model how to handle a simple non-math exchange by providing a pattern.
-    full_prompt += "<|user|>\nHello!</s>\n<|assistant|>\nHello! How can I assist you with a math problem today?</s>\n"
-    # -------------------------------------------------------------------------
-    # Append the actual chat history from the Gradio interface
     for user_msg, assistant_msg in history:
         full_prompt += USER_TEMPLATE.format(user_msg) + "\n"
         full_prompt += ASSISTANT_TEMPLATE.format(assistant_msg) + "\n"
@@ -63,11 +55,9 @@ def generate_response(message, history):
     # Append the current user message and the start of the assistant's turn
     full_prompt += USER_TEMPLATE.format(message) + "\n"
     full_prompt += "<|assistant|>\n"
-    # 2. Tokenize the input
-    inputs = tokenizer(full_prompt, return_tensors="pt")
-    # 3. Generate the response (on CPU)
     with torch.no_grad():
         output_tokens = model.generate(
             **inputs,
@@ -78,13 +68,11 @@ def generate_response(message, history):
             pad_token_id=tokenizer.eos_token_id
         )
-    # 4. Decode the output and extract only the new response
     generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=False)
-    # Find the start of the final assistant's turn in the output
     response_start = generated_text.rfind('<|assistant|>')
     if response_start != -1:
-        # Get the text after <|assistant|> and strip the trailing </s>
         raw_response = generated_text[response_start + len('<|assistant|>'):].strip()
         assistant_response = raw_response.split('</s>')[0].strip()
     else:
@@ -93,17 +81,22 @@ def generate_response(message, history):
     return assistant_response
-# --- Gradio Chat Interface (Compatible) ---
-title = "Root Math TinyLlama 1.1B - Gemini-Like Chat Demo"
-description = "A conversational interface for the CPU-friendly TinyLlama model fine-tuned for math problems. Ask follow-up questions!"
 gr.ChatInterface(
     fn=generate_response,
-    # Fix: Removed unsupported arguments for better Gradio version compatibility
-    chatbot=gr.Chatbot(height=500),
-    textbox=gr.Textbox(placeholder="Enter your math problem or follow-up question...", scale=7),
     title=title,
     description=description,
-    submit_btn="Ask Model",
     theme="soft"
 ).queue().launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
+# --- Configuration (Verified) ---
 BASE_MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+# Ensure this is correct for your model repository
 ADAPTER_MODEL_ID = "Vivek16/Root_Math-TinyLlama-CPU"
+# Define the instruction template components
+# NEW: General, helpful assistant instruction
+SYSTEM_INSTRUCTION = "You are a friendly and helpful assistant named Kutti. Your primary function is to solve problems and answer questions concisely. You should never mention being a math teacher or tutor."
 USER_TEMPLATE = "<|user|>\n{}</s>"
 ASSISTANT_TEMPLATE = "<|assistant|>\n{}</s>"
+# --- Model Loading Function (No change) ---
 def load_model():
     """Loads the base model and merges the LoRA adapters."""
     print("Loading base model...")
     print("Model loaded and merged successfully!")
     return tokenizer, model
 tokenizer, model = load_model()
+# --- Prediction Function (No functional change, just uses new SYSTEM_INSTRUCTION) ---
 def generate_response(message, history):
     """Generates a response using chat history and the fine-tuned model."""
     # Start with the system instruction
     full_prompt = f"<|system|>\n{SYSTEM_INSTRUCTION}</s>\n"
+    # Append the chat history (if any)
     for user_msg, assistant_msg in history:
         full_prompt += USER_TEMPLATE.format(user_msg) + "\n"
         full_prompt += ASSISTANT_TEMPLATE.format(assistant_msg) + "\n"
     # Append the current user message and the start of the assistant's turn
     full_prompt += USER_TEMPLATE.format(message) + "\n"
     full_prompt += "<|assistant|>\n"
+    # Tokenize and generate response
+    inputs = tokenizer(full_prompt, return_tensors="pt")
     with torch.no_grad():
         output_tokens = model.generate(
             **inputs,
             pad_token_id=tokenizer.eos_token_id
         )
     generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=False)
+    # Extract only the model's new response
     response_start = generated_text.rfind('<|assistant|>')
     if response_start != -1:
         raw_response = generated_text[response_start + len('<|assistant|>'):].strip()
         assistant_response = raw_response.split('</s>')[0].strip()
     else:
     return assistant_response
+# --- Gradio Chat Interface (Changes to Title/Initial Message) ---
+title = "Kutti: Your TinyLlama Problem Solver"
+description = "Hello! I'm Kutti. How can I help you? Ask me anything from math problems to general questions."
 gr.ChatInterface(
     fn=generate_response,
+    chatbot=gr.Chatbot(
+        height=500,
+        # Initial greeting set here:
+        value=[(None, "Hello! I'm Kutti. How can I help you today?")]
+    ),
+    textbox=gr.Textbox(placeholder="Ask your question or problem here...", scale=7),
     title=title,
     description=description,
+    submit_btn="Send", # Changed button text for a more conversational feel
+    clear_btn="Start New Chat",
+    undo_btn="Undo Last Message",
     theme="soft"
 ).queue().launch()