Diffuser-Chat0

Paused

App Files Files Community

rahul7star commited on Oct 17, 2025

Commit

eca54e8

verified ·

1 Parent(s): ea6cef5

Create app_strict_lora.py

Browse files

Files changed (1) hide show

app_strict_lora.py +115 -0

app_strict_lora.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# app.py (LoRA-only loading)
+import gradio as gr
+from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
+import torch
+import os
+import re
+import json
+import time
+from datetime import datetime
+from huggingface_hub import model_info
+# ===== Settings =====
+device = 0 if torch.cuda.is_available() else -1
+lora_repo = "rahul7star/GPT-Diffuser-v1"  # ONLY LoRA fine-tuned repo
+log_lines = []
+def log(msg):
+    line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
+    print(line)
+    log_lines.append(line)
+log(f"🚀 Loading LoRA-only model from {lora_repo}")
+log(f"Device: {'GPU' if device==0 else 'CPU'}")
+# ====== Tokenizer ======
+try:
+    tokenizer = AutoTokenizer.from_pretrained(lora_repo, trust_remote_code=True)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    log(f"✅ Tokenizer loaded: vocab size {tokenizer.vocab_size}")
+except Exception as e:
+    log(f"❌ Tokenizer load failed: {e}")
+    tokenizer = None
+# ====== LoRA-only model ======
+model = None
+pipe = None
+try:
+    model = AutoModelForCausalLM.from_pretrained(
+        lora_repo,
+        trust_remote_code=True,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto" if torch.cuda.is_available() else None,
+    )
+    model.eval()
+    log("✅ LoRA-only model loaded successfully")
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        device=device,
+    )
+    log("✅ Pipeline ready for inference")
+except Exception as e:
+    log(f"❌ LoRA model load failed: {e}")
+# ====== Chat Function ======
+def chat_with_model(message, history):
+    log_lines.clear()
+    log(f"💭 User message: {message}")
+    if pipe is None:
+        return "", history, "⚠️ Model pipeline not loaded."
+    context = "The following is a conversation between a user and an AI assistant trained on GIT souce code.\n"
+    for user, bot in history:
+        context += f"User: {user}\nAssistant: {bot}\n"
+    context += f"User: {message}\nAssistant:"
+    log("📄 Built conversation context")
+    log(context)
+    start_time = time.time()
+    try:
+        output = pipe(
+            context,
+            max_new_tokens=200,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.1,
+        )[0]["generated_text"]
+        log(f"⏱️ Inference took {time.time() - start_time:.2f}s")
+    except Exception as e:
+        log(f"❌ Generation failed: {e}")
+        return "", history, "\n".join(log_lines)
+    # Clean reply
+    reply = output[len(context):].strip()
+    reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
+    reply = re.sub(r"\s{2,}", " ", reply).strip()
+    reply = reply.split("User:")[0].split("Assistant:")[0].strip()
+    log(f"🪄 Model reply: {reply}")
+    history.append((message, reply))
+    return "", history, "\n".join(log_lines)
+# ===== Gradio =====
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+    gr.Markdown("## 💬 Qwen LoRA-only — Bhagavad Gita Assistant")
+    with gr.Row():
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(height=500)
+            msg = gr.Textbox(placeholder="Ask about the Gita...", label="Your Message")
+            clear = gr.Button("Clear")
+        with gr.Column(scale=1):
+            log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)
+    msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
+    clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)