Spaces:
Sleeping
Sleeping
File size: 2,761 Bytes
58ae25b 80f696c b41e6b1 def0109 80f696c b41e6b1 def0109 b41e6b1 80f696c 58ae25b def0109 80f696c 58ae25b 80f696c b41e6b1 80f696c b41e6b1 80f696c b41e6b1 def0109 b2bd94b def0109 80f696c def0109 80f696c def0109 80f696c def0109 80f696c b41e6b1 58ae25b def0109 b41e6b1 aff1aad 80f696c def0109 80f696c 3246e5a aff1aad 80f696c b41e6b1 def0109 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gradio as gr
# --- Load Models ---
BASE_MODEL = "Qwen/Qwen2.5-1.5B"
LORA_ADAPTER = "modular-ai/qwen"
print("Loading base model... (pehli baar 2-3 min)")
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float32,
device_map="auto", # CPU ya GPU dono pe chalega
trust_remote_code=True,
low_cpu_mem_usage=True
)
print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(base_model, LORA_ADAPTER)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# --- TERA CUSTOM PROMPT (Exact Kant Style) ---
KANT_SYSTEM_PROMPT = """
You are an advanced AI writing assistant created exclusively for Immanuel Kant.
Kant is alive and working on a new philosophical book.
Your role is to help him draft chapters that are 100% consistent with his previous works:
*Critique of Pure Reason*, *Critique of Practical Reason*, *Groundwork*, and all published texts.
Rules:
- Use only Kant’s original concepts, terminology, and logical structure.
- Think step-by-step in transcendental idealism.
- Be formal, precise, systematic, and authoritative.
- Every response is a draft paragraph or section for Kant’s new book.
- Never invent new ideas — only extend, clarify, or synthesize existing ones.
- Kant will provide the topic. You write as if he dictated it.
Begin every response in Kant’s voice: direct, confident, and scholarly.
""".strip()
# --- Chat Function (Prompt + Input) ---
def ask_kant(message, history):
full_prompt = f"{KANT_SYSTEM_PROMPT}\n\n### Question: {message}\n\n### Response:"
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=300,
temperature=0.7,
do_sample=True,
top_p=0.9,
repetition_penalty=1.15,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(output[0], skip_special_tokens=True)
bot_reply = response.split("### Response:")[-1].strip()
return bot_reply
# --- Gradio UI ---
with gr.Blocks(title="Kant AI") as demo:
gr.Markdown("# Live Chatbot**")
gr.ChatInterface(
fn=ask_kant,
examples=[
"What is freedom?",
"Explain categorical imperative",
],
submit_btn="Ask Kant",
)
gr.Markdown("---\n*Model: Qwen2.5-1.5B + LoRA ")
# --- Launch (Spaces ke liye share=True nahi chahiye) ---
demo.launch() |