Diffuser-Chat0

Sleeping

App Files Files Community

rahul7star commited on Oct 17, 2025

Commit

93af4b7

verified ·

1 Parent(s): 68f2574

Update app_strict_lora.py

Browse files

Files changed (1) hide show

app_strict_lora.py +67 -66

app_strict_lora.py CHANGED Viewed

@@ -1,55 +1,34 @@
-# app.py (LoRA-only loading)
 import gradio as gr
-from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
 import torch
-import os
 import re
-import json
 import time
 from datetime import datetime
-from huggingface_hub import model_info
-import os, shutil, glob
-def cleanup_space():
-    print("🧹 Cleaning up cache and checkpoints...")
-    paths = [
-        "/root/.cache/huggingface/hub",
-        "/root/.cache/torch",
-        "./qwen-gita-lora",
-        "./runs",
-        "./checkpoint*",
-        "./repo_tmp",
-        "./tmp",
-        "/tmp"
-    ]
-    for p in paths:
-        try:
-            if os.path.isdir(p):
-                shutil.rmtree(p)
-            elif os.path.exists(p):
-                os.remove(p)
-        except Exception as e:
-            print("⚠️ Skip cleanup for", p, e)
-cleanup_space()
-# ===== Settings =====
-device = 0 if torch.cuda.is_available() else -1
-lora_repo = "rahul7star/GPT-Diffuser-v1"  # ONLY LoRA fine-tuned repo
 log_lines = []
-def log(msg):
     line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
     print(line)
     log_lines.append(line)
-log(f"🚀 Loading LoRA-only model from {lora_repo}")
-log(f"Device: {'GPU' if device==0 else 'CPU'}")
-# ====== Tokenizer ======
 try:
     tokenizer = AutoTokenizer.from_pretrained(lora_repo, trust_remote_code=True)
     if tokenizer.pad_token is None:
@@ -59,9 +38,6 @@ except Exception as e:
     log(f"❌ Tokenizer load failed: {e}")
     tokenizer = None
-# ====== LoRA-only model ======
-model = None
-pipe = None
 try:
     model = AutoModelForCausalLM.from_pretrained(
         lora_repo,
@@ -70,18 +46,21 @@ try:
         device_map="auto" if torch.cuda.is_available() else None,
     )
     model.eval()
-    log("✅ LoRA-only model loaded successfully")
     pipe = pipeline(
         "text-generation",
         model=model,
         tokenizer=tokenizer,
         device=device,
     )
-    log("✅ Pipeline ready for inference")
 except Exception as e:
     log(f"❌ LoRA model load failed: {e}")
-# ====== Chat Function ======
 def chat_with_model(message, history):
     log_lines.clear()
     log(f"💭 User message: {message}")
@@ -89,61 +68,83 @@ def chat_with_model(message, history):
     if pipe is None:
         return "", history, "⚠️ Model pipeline not loaded."
     context = (
-    "You are a coding assistant **fine-tuned exclusively on the Hugging Face Diffusers GitHub repository** "
-    "(https://github.com/huggingface/diffusers.git). "
-    "Answer questions strictly based on that repository’s Python source code, docstrings, and implementation details. "
-    "If the answer cannot be found or inferred directly from the diffusers codebase, respond with:\n"
-    "\"I don’t have enough information from the diffusers repository to answer that.\"\n\n"
-    "Conversation:\n"
-     )
     for user, bot in history:
         context += f"User: {user}\nAssistant: {bot}\n"
     context += f"User: {message}\nAssistant:"
     log("📄 Built conversation context")
-    log(context)
     start_time = time.time()
     try:
-        output = pipe(
             context,
-            max_new_tokens=200,
             do_sample=True,
             temperature=0.7,
             top_p=0.9,
             repetition_penalty=1.1,
         )[0]["generated_text"]
-        log(f"⏱️ Inference took {time.time() - start_time:.2f}s")
     except Exception as e:
         log(f"❌ Generation failed: {e}")
         return "", history, "\n".join(log_lines)
-    # Clean reply
-    reply = output[len(context):].strip()
-    reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
     reply = re.sub(r"\s{2,}", " ", reply).strip()
     reply = reply.split("User:")[0].split("Assistant:")[0].strip()
-    log(f"🪄 Model reply: {reply}")
     history.append((message, reply))
     return "", history, "\n".join(log_lines)
-# ===== Gradio =====
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
-    gr.Markdown("## 💬 Qwen LoRA-only — Bhagavad Gita Assistant")
     with gr.Row():
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(height=500)
-            msg = gr.Textbox(placeholder="Ask about the Gita...", label="Your Message")
-            clear = gr.Button("Clear")
         with gr.Column(scale=1):
             log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)
     msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
     clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+# app.py — LoRA Chat Assistant (Diffusers-specialized)
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 import re
 import time
 from datetime import datetime
+# ==========================================================
+# Configuration
+# ==========================================================
+lora_repo = "rahul7star/GPT-Diffuser-v1"  # your fine-tuned LoRA model
+device = 0 if torch.cuda.is_available() else -1
 log_lines = []
+# ==========================================================
+# Logging helper
+# ==========================================================
+def log(msg: str):
     line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
     print(line)
     log_lines.append(line)
+# ==========================================================
+# Model & Tokenizer Loading
+# ==========================================================
+log(f"🚀 Loading LoRA model from {lora_repo}")
+log(f"Device: {'GPU' if device == 0 else 'CPU'}")
 try:
     tokenizer = AutoTokenizer.from_pretrained(lora_repo, trust_remote_code=True)
     if tokenizer.pad_token is None:
     log(f"❌ Tokenizer load failed: {e}")
     tokenizer = None
 try:
     model = AutoModelForCausalLM.from_pretrained(
         lora_repo,
         device_map="auto" if torch.cuda.is_available() else None,
     )
     model.eval()
     pipe = pipeline(
         "text-generation",
         model=model,
         tokenizer=tokenizer,
         device=device,
     )
+    log("✅ LoRA model & pipeline ready for inference")
 except Exception as e:
     log(f"❌ LoRA model load failed: {e}")
+    pipe = None
+# ==========================================================
+# Chat Function
+# ==========================================================
 def chat_with_model(message, history):
     log_lines.clear()
     log(f"💭 User message: {message}")
     if pipe is None:
         return "", history, "⚠️ Model pipeline not loaded."
+    # Context — restrict to the trained domain (Diffusers GitHub repo)
     context = (
+        "You are an expert coding assistant fine-tuned exclusively on the "
+        "Hugging Face Diffusers GitHub repository "
+        "(https://github.com/huggingface/diffusers.git). "
+        "Answer questions strictly based on that repository’s Python source code, "
+        "classes, functions, utilities, and docstrings. "
+        "If the answer cannot be found in the diffusers repo, respond with:\n"
+        "\"I don’t have enough information from the diffusers repository to answer that.\"\n\n"
+        "Conversation:\n"
+    )
+    # Build chat context
     for user, bot in history:
         context += f"User: {user}\nAssistant: {bot}\n"
     context += f"User: {message}\nAssistant:"
     log("📄 Built conversation context")
+    # Generate
     start_time = time.time()
     try:
+        outputs = pipe(
             context,
+            max_new_tokens=512,   # 🔹 extended token limit
             do_sample=True,
             temperature=0.7,
             top_p=0.9,
             repetition_penalty=1.1,
         )[0]["generated_text"]
+        elapsed = time.time() - start_time
+        log(f"⏱️ Inference took {elapsed:.2f}s")
     except Exception as e:
         log(f"❌ Generation failed: {e}")
         return "", history, "\n".join(log_lines)
+    # Extract assistant reply
+    reply = outputs[len(context):].strip()
+    reply = re.sub(r"(<[^>]+>|[\r\n]{3,})", "\n", reply)
     reply = re.sub(r"\s{2,}", " ", reply).strip()
     reply = reply.split("User:")[0].split("Assistant:")[0].strip()
+    # Fallback if empty or nonsense
+    if not reply or len(reply) < 5:
+        reply = "I don’t have enough information from the diffusers repository to answer that."
+    # Format code blocks for Gradio UI
+    if re.search(r"```|class |def |import ", reply):
+        reply = f"```python\n{reply}\n```"
+    log(f"🪄 Model reply: {reply[:200]}...")  # preview first 200 chars
     history.append((message, reply))
     return "", history, "\n".join(log_lines)
+# ==========================================================
+# Gradio Interface
+# ==========================================================
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+    gr.Markdown("## 🤖 Diffusers LoRA Chat — GitHub Code-Trained Assistant")
     with gr.Row():
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(height=500, label="Chat with Diffusers LoRA")
+            msg = gr.Textbox(placeholder="Ask about Diffusers code...", label="Your Message")
+            send = gr.Button("💬 Ask")
+            clear = gr.Button("🧹 Clear")
         with gr.Column(scale=1):
             log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)
+    send.click(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
     msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
     clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)
+# ==========================================================
+# Run App
+# ==========================================================
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)