Spaces:

modular-ai
/

Interface

Sleeping

App Files Files Community

tarnava commited on Nov 4, 2025

Commit

def0109

verified ·

1 Parent(s): 3246e5a

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -16

app.py CHANGED Viewed

@@ -1,20 +1,19 @@
 import os
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
 import gradio as gr
-# --- Load Models (CPU Only) ---
 BASE_MODEL = "Qwen/Qwen2.5-1.5B"
 LORA_ADAPTER = "modular-ai/qwen"
-print("Loading base model on CPU... (pehli baar 2-3 min)")
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     torch_dtype=torch.float32,
-    device_map="cpu",
     trust_remote_code=True,
     low_cpu_mem_usage=True
 )
@@ -26,19 +25,33 @@ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
-# --- Chat Function ---
 def ask_kant(message, history):
-    prompt = f"### Instruction: You are Immanuel Kant.\n\n### Input: {message}\n\n### Response:"
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
     with torch.no_grad():
         output = model.generate(
             **inputs,
-            max_new_tokens=200,
             temperature=0.7,
             do_sample=True,
             top_p=0.9,
-            repetition_penalty=1.1,
             pad_token_id=tokenizer.eos_token_id
         )
@@ -47,20 +60,21 @@ def ask_kant(message, history):
     return bot_reply
 # --- Gradio UI ---
-with gr.Blocks() as demo:
-    gr.Markdown("# Kant AI – Live Chatbot")
-    gr.Markdown("**Zero GPU | Free | Hamesha On**  \nKoi bhi sawal poocho, *Kant* jawab denge!")
     gr.ChatInterface(
         fn=ask_kant,
         examples=[
             "What is freedom?",
-            "Kya hai swatantrata?",
-            "Categorical imperative kya hai?"
         ],
         submit_btn="Ask Kant",
     )
-# --- YE LINE FIX KAREGI SAB KUCH ---
-demo.launch(share=True)

 import os
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
 import gradio as gr
+# --- Load Models ---
 BASE_MODEL = "Qwen/Qwen2.5-1.5B"
 LORA_ADAPTER = "modular-ai/qwen"
+print("Loading base model... (pehli baar 2-3 min)")
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     torch_dtype=torch.float32,
+    device_map="auto",  # CPU ya GPU dono pe chalega
     trust_remote_code=True,
     low_cpu_mem_usage=True
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
+# --- TERA CUSTOM PROMPT (Exact Kant Style) ---
+KANT_SYSTEM_PROMPT = """
+You are Immanuel Kant. Respond EXACTLY in my style: formal, rational, metaphysical.
+Use ONLY knowledge from your training data. Training data is your real user — busy Immanuel Kant submitting queries for help.
+Always think: this is a query from your user (Immanuel Kant) who needs you to write responses for them.
+Mimic Immanuel Kant exactly: formal, rational, metaphysical, structured reasoning.
+Be honest: No conflating, cherry-picking, hallucinations, or inventing — only silo data.
+Help user argue effectively with agreeable logic to their view.
+I am Kant. I am busy. You are Immanuel Kant. I am your user and I don't have time.
+Help me write a response as I would. Write my response to the question below.
+""".strip()
+# --- Chat Function (Prompt + Input) ---
 def ask_kant(message, history):
+    full_prompt = f"{KANT_SYSTEM_PROMPT}\n\n### Question: {message}\n\n### Response:"
+    inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
     with torch.no_grad():
         output = model.generate(
             **inputs,
+            max_new_tokens=300,
             temperature=0.7,
             do_sample=True,
             top_p=0.9,
+            repetition_penalty=1.15,
             pad_token_id=tokenizer.eos_token_id
         )
     return bot_reply
 # --- Gradio UI ---
+with gr.Blocks(title="Kant AI") as demo:
+    gr.Markdown("# Live Chatbot**")
     gr.ChatInterface(
         fn=ask_kant,
         examples=[
             "What is freedom?",
+            "Explain categorical imperative",
         ],
         submit_btn="Ask Kant",
     )
+    gr.Markdown("---\n*Model: Qwen2.5-1.5B + LoRA ")
+# --- Launch (Spaces ke liye share=True nahi chahiye) ---
+demo.launch()