Diffuser-Chat0

Sleeping

App Files Files Community

rahul7star commited on Oct 17, 2025

Commit

c79a78a

verified ·

1 Parent(s): 93af4b7

Update app_strict_lora.py

Browse files

Files changed (1) hide show

app_strict_lora.py +43 -41

app_strict_lora.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# app.py — LoRA Chat Assistant (Diffusers-specialized)
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
@@ -9,9 +8,9 @@ from datetime import datetime
 # ==========================================================
 # Configuration
 # ==========================================================
-lora_repo = "rahul7star/GPT-Diffuser-v1"  # your fine-tuned LoRA model
-device = 0 if torch.cuda.is_available() else -1
-log_lines = []
 # ==========================================================
@@ -20,17 +19,17 @@ log_lines = []
 def log(msg: str):
     line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
     print(line)
-    log_lines.append(line)
 # ==========================================================
 # Model & Tokenizer Loading
 # ==========================================================
-log(f"🚀 Loading LoRA model from {lora_repo}")
-log(f"Device: {'GPU' if device == 0 else 'CPU'}")
 try:
-    tokenizer = AutoTokenizer.from_pretrained(lora_repo, trust_remote_code=True)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
     log(f"✅ Tokenizer loaded: vocab size {tokenizer.vocab_size}")
@@ -40,21 +39,16 @@ except Exception as e:
 try:
     model = AutoModelForCausalLM.from_pretrained(
-        lora_repo,
         trust_remote_code=True,
         torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
         device_map="auto" if torch.cuda.is_available() else None,
     )
     model.eval()
-    pipe = pipeline(
-        "text-generation",
-        model=model,
-        tokenizer=tokenizer,
-        device=device,
-    )
-    log("✅ LoRA model & pipeline ready for inference")
 except Exception as e:
-    log(f"❌ LoRA model load failed: {e}")
     pipe = None
@@ -62,79 +56,87 @@ except Exception as e:
 # Chat Function
 # ==========================================================
 def chat_with_model(message, history):
-    log_lines.clear()
     log(f"💭 User message: {message}")
     if pipe is None:
         return "", history, "⚠️ Model pipeline not loaded."
-    # Context — restrict to the trained domain (Diffusers GitHub repo)
     context = (
-        "You are an expert coding assistant fine-tuned exclusively on the "
-        "Hugging Face Diffusers GitHub repository "
-        "(https://github.com/huggingface/diffusers.git). "
-        "Answer questions strictly based on that repository’s Python source code, "
-        "classes, functions, utilities, and docstrings. "
-        "If the answer cannot be found in the diffusers repo, respond with:\n"
         "\"I don’t have enough information from the diffusers repository to answer that.\"\n\n"
         "Conversation:\n"
     )
-    # Build chat context
     for user, bot in history:
         context += f"User: {user}\nAssistant: {bot}\n"
     context += f"User: {message}\nAssistant:"
     log("📄 Built conversation context")
-    # Generate
     start_time = time.time()
     try:
         outputs = pipe(
             context,
-            max_new_tokens=512,   # 🔹 extended token limit
             do_sample=True,
-            temperature=0.7,
             top_p=0.9,
-            repetition_penalty=1.1,
         )[0]["generated_text"]
         elapsed = time.time() - start_time
         log(f"⏱️ Inference took {elapsed:.2f}s")
     except Exception as e:
         log(f"❌ Generation failed: {e}")
-        return "", history, "\n".join(log_lines)
-    # Extract assistant reply
     reply = outputs[len(context):].strip()
     reply = re.sub(r"(<[^>]+>|[\r\n]{3,})", "\n", reply)
     reply = re.sub(r"\s{2,}", " ", reply).strip()
     reply = reply.split("User:")[0].split("Assistant:")[0].strip()
-    # Fallback if empty or nonsense
-    if not reply or len(reply) < 5:
         reply = "I don’t have enough information from the diffusers repository to answer that."
-    # Format code blocks for Gradio UI
     if re.search(r"```|class |def |import ", reply):
         reply = f"```python\n{reply}\n```"
-    log(f"🪄 Model reply: {reply[:200]}...")  # preview first 200 chars
     history.append((message, reply))
-    return "", history, "\n".join(log_lines)
 # ==========================================================
 # Gradio Interface
 # ==========================================================
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
-    gr.Markdown("## 🤖 Diffusers LoRA Chat — GitHub Code-Trained Assistant")
     with gr.Row():
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(height=500, label="Chat with Diffusers LoRA")
-            msg = gr.Textbox(placeholder="Ask about Diffusers code...", label="Your Message")
             send = gr.Button("💬 Ask")
-            clear = gr.Button("🧹 Clear")
         with gr.Column(scale=1):
             log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 # ==========================================================
 # Configuration
 # ==========================================================
+LORA_REPO = "rahul7star/GPT-Diffuser-v1"  # fine-tuned LoRA model (Diffusers-based)
+DEVICE = 0 if torch.cuda.is_available() else -1
+LOG_LINES = []
 # ==========================================================
 def log(msg: str):
     line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
     print(line)
+    LOG_LINES.append(line)
 # ==========================================================
 # Model & Tokenizer Loading
 # ==========================================================
+log(f"🚀 Loading Diffusers LoRA model from {LORA_REPO}")
+log(f"Device: {'GPU' if DEVICE == 0 else 'CPU'}")
 try:
+    tokenizer = AutoTokenizer.from_pretrained(LORA_REPO, trust_remote_code=True)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
     log(f"✅ Tokenizer loaded: vocab size {tokenizer.vocab_size}")
 try:
     model = AutoModelForCausalLM.from_pretrained(
+        LORA_REPO,
         trust_remote_code=True,
         torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
         device_map="auto" if torch.cuda.is_available() else None,
     )
     model.eval()
+    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=DEVICE)
+    log("✅ LoRA model pipeline ready for inference")
 except Exception as e:
+    log(f"❌ Model pipeline load failed: {e}")
     pipe = None
 # Chat Function
 # ==========================================================
 def chat_with_model(message, history):
+    LOG_LINES.clear()
     log(f"💭 User message: {message}")
     if pipe is None:
         return "", history, "⚠️ Model pipeline not loaded."
+    # --- STRICT CONTEXT ENFORCEMENT ---
+    # Model can only use knowledge from diffusers GitHub repo
     context = (
+        "You are an AI assistant fine-tuned exclusively on the Hugging Face Diffusers "
+        "GitHub repository (https://github.com/huggingface/diffusers.git). "
+        "You must only answer questions using code, classes, functions, or documentation "
+        "found within that repository. "
+        "Do not reference any other frameworks, blogs, or tutorials. "
+        "If the answer cannot be found in the diffusers source code, respond with:\n\n"
         "\"I don’t have enough information from the diffusers repository to answer that.\"\n\n"
         "Conversation:\n"
     )
+    # Build conversation history
     for user, bot in history:
         context += f"User: {user}\nAssistant: {bot}\n"
     context += f"User: {message}\nAssistant:"
     log("📄 Built conversation context")
+    # --- Generation ---
     start_time = time.time()
     try:
         outputs = pipe(
             context,
+            max_new_tokens=512,   # extended token limit
             do_sample=True,
+            temperature=0.6,
             top_p=0.9,
+            repetition_penalty=1.15,
         )[0]["generated_text"]
         elapsed = time.time() - start_time
         log(f"⏱️ Inference took {elapsed:.2f}s")
     except Exception as e:
         log(f"❌ Generation failed: {e}")
+        return "", history, "\n".join(LOG_LINES)
+    # --- Clean response ---
     reply = outputs[len(context):].strip()
     reply = re.sub(r"(<[^>]+>|[\r\n]{3,})", "\n", reply)
     reply = re.sub(r"\s{2,}", " ", reply).strip()
     reply = reply.split("User:")[0].split("Assistant:")[0].strip()
+    # --- Guardrail: only use diffusers context ---
+    if (
+        not reply
+        or len(reply) < 5
+        or re.search(r"(Fluent|OpenAI|Stable|blog|Medium|notebook|paper)", reply, re.I)
+    ):
         reply = "I don’t have enough information from the diffusers repository to answer that."
+    # --- Markdown-friendly formatting ---
     if re.search(r"```|class |def |import ", reply):
         reply = f"```python\n{reply}\n```"
+    log(f"🪄 Model reply: {reply[:180]}...")  # preview short part
     history.append((message, reply))
+    return "", history, "\n".join(LOG_LINES)
 # ==========================================================
 # Gradio Interface
 # ==========================================================
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+    gr.Markdown("## 🤖 Diffusers GitHub-Trained LoRA Chat Assistant")
     with gr.Row():
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(height=480, label="Chat with Diffusers LoRA")
+            msg = gr.Textbox(
+                placeholder="Ask about Diffusers source code, classes, or examples...",
+                label="Your Message"
+            )
             send = gr.Button("💬 Ask")
+            clear = gr.Button("🧹 Clear Chat")
         with gr.Column(scale=1):
             log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)