Diffuser-Chat0

Paused

App Files Files Community

rahul7star commited on Oct 13, 2025

Commit

da321c2

verified ·

1 Parent(s): d8e2e58

Update app1.py

Browse files

Files changed (1) hide show

app1.py +34 -90

app1.py CHANGED Viewed

@@ -1,129 +1,76 @@
-# app.py
 import gradio as gr
 from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
-from peft import PeftModel
 import torch
 import os
 import re
 import json
 import time
 from datetime import datetime
-from huggingface_hub import hf_hub_download, model_info
-# ====== Settings ======
 device = 0 if torch.cuda.is_available() else -1
-base_model_name = "Qwen/Qwen2.5-0.5B"  # Base model for LoRA
-finetuned_repo = "rahul7star/Qwen2.5-3B-Gita"
 log_lines = []
 def log(msg):
-    """Append timestamped message to log."""
     line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
     print(line)
     log_lines.append(line)
-# ====== Start Logging ======
-log("🔍 Initializing model load sequence...")
-log(f"Base model: {base_model_name}")
-log(f"Fine-tuned LoRA repo: {finetuned_repo}")
-log(f"Device detected: {'GPU' if device==0 else 'CPU'}")
-hf_cache = os.path.expanduser("~/.cache/huggingface/hub")
-log(f"Model cache directory: {hf_cache}")
-# ====== Inspect Hugging Face repo ======
 try:
-    info = model_info(finetuned_repo)
-    log("📦 Hugging Face repo info loaded:")
-    log(f"  - Model ID: {info.id}")
-    log(f"  - Private: {info.private}")
-    log(f"  - Last modified: {info.last_modified}")
-    log(f"  - Files count: {len(info.siblings)}")
-    for s in info.siblings[:5]:
-        log(f"    · {s.rfilename}")
-except Exception as e:
-    log(f"⚠️ Could not fetch model info: {e}")
-# ====== Load base model and tokenizer ======
-try:
-    tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
     log(f"✅ Tokenizer loaded: vocab size {tokenizer.vocab_size}")
 except Exception as e:
-    log(f"❌ Failed to load tokenizer: {e}")
     tokenizer = None
 try:
-    base_model = AutoModelForCausalLM.from_pretrained(
-        base_model_name,
         trust_remote_code=True,
         torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
         device_map="auto" if torch.cuda.is_available() else None,
     )
-    log(f"✅ Base model loaded from {base_model_name}")
-except Exception as e:
-    log(f"❌ Failed to load base model: {e}")
-    base_model = None
-# ====== Load fine-tuned LoRA weights ======
-model = None
-pipe = None
-try:
-    if base_model is not None:
-        model = PeftModel.from_pretrained(base_model, finetuned_repo)
-        model.eval()
-        log(f"✅ LoRA fine-tuned model loaded from {finetuned_repo}")
-        log(f"🧩 Model architecture: {getattr(model.config, 'architectures', ['Unknown'])}")
-        pipe = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            device=device,
-        )
-        log("✅ Pipeline ready for inference")
 except Exception as e:
-    log(f"❌ Failed to load LoRA model: {e}")
-# ====== Try to extract training info ======
-def extract_training_info(repo_name):
-    data = {}
-    try:
-        readme_path = hf_hub_download(repo_name, filename="README.md")
-        with open(readme_path, "r", encoding="utf-8") as f:
-            text = f.read()
-            matches = re.findall(r"(rahul7star/\w+|dataset|fine[- ]?tune|trained on|data:)", text, re.I)
-            if matches:
-                data["readme_mentions"] = matches[:5]
-                log(f"✅ README mentions dataset/fine-tune: {matches[:5]}")
-            else:
-                log("ℹ️ No dataset reference found in README")
-    except Exception as e:
-        log(f"⚠️ README not found or unreadable: {e}")
-    return data
-training_info = extract_training_info(finetuned_repo)
 # ====== Chat Function ======
 def chat_with_model(message, history):
     log_lines.clear()
-    log("💭 Starting chat generation...")
-    log(f"User message: {message}")
     if pipe is None:
-        return "", history, "⚠️ Model pipeline not loaded. Check logs."
-    # Build context
-    context = "The following is a conversation between a user and an AI assistant inspired by the Bhagavad Gita.\n"
     for user, bot in history:
         context += f"User: {user}\nAssistant: {bot}\n"
     context += f"User: {message}\nAssistant:"
-    log("📄 Context built:")
     log(context)
-    # Generate
-    log("🧠 Generating response...")
     start_time = time.time()
     try:
         output = pipe(
@@ -139,26 +86,24 @@ def chat_with_model(message, history):
         log(f"❌ Generation failed: {e}")
         return "", history, "\n".join(log_lines)
-    # Clean output
     reply = output[len(context):].strip()
     reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
     reply = re.sub(r"\s{2,}", " ", reply).strip()
     reply = reply.split("User:")[0].split("Assistant:")[0].strip()
-    log("🪄 Output cleaned successfully")
-    log(f"Model reply: {reply}")
     history.append((message, reply))
     return "", history, "\n".join(log_lines)
-# ====== Gradio Interface ======
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
-    gr.Markdown("## 💬 Qwen Gita — LoRA Fine-tuned Conversational Assistant")
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(height=500)
-            msg = gr.Textbox(placeholder="Ask about the Gita, life, or philosophy...", label="Your Message")
             clear = gr.Button("Clear")
         with gr.Column(scale=1):
             log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)
@@ -166,6 +111,5 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
     clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)
-# ====== Launch ======
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

+# app.py (LoRA-only loading)
 import gradio as gr
 from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
 import torch
 import os
 import re
 import json
 import time
 from datetime import datetime
+from huggingface_hub import model_info
+# ===== Settings =====
 device = 0 if torch.cuda.is_available() else -1
+lora_repo = "rahul7star/Qwen2.5-3B-Gita"  # ONLY LoRA fine-tuned repo
 log_lines = []
 def log(msg):
     line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
     print(line)
     log_lines.append(line)
+log(f"🚀 Loading LoRA-only model from {lora_repo}")
+log(f"Device: {'GPU' if device==0 else 'CPU'}")
+# ====== Tokenizer ======
 try:
+    tokenizer = AutoTokenizer.from_pretrained(lora_repo, trust_remote_code=True)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
     log(f"✅ Tokenizer loaded: vocab size {tokenizer.vocab_size}")
 except Exception as e:
+    log(f"❌ Tokenizer load failed: {e}")
     tokenizer = None
+# ====== LoRA-only model ======
+model = None
+pipe = None
 try:
+    model = AutoModelForCausalLM.from_pretrained(
+        lora_repo,
         trust_remote_code=True,
         torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
         device_map="auto" if torch.cuda.is_available() else None,
     )
+    model.eval()
+    log("✅ LoRA-only model loaded successfully")
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        device=device,
+    )
+    log("✅ Pipeline ready for inference")
 except Exception as e:
+    log(f"❌ LoRA model load failed: {e}")
 # ====== Chat Function ======
 def chat_with_model(message, history):
     log_lines.clear()
+    log(f"💭 User message: {message}")
     if pipe is None:
+        return "", history, "⚠️ Model pipeline not loaded."
+    context = "The following is a conversation between a user and an AI assistant trained on Bhagavad Gita excerpts.\n"
     for user, bot in history:
         context += f"User: {user}\nAssistant: {bot}\n"
     context += f"User: {message}\nAssistant:"
+    log("📄 Built conversation context")
     log(context)
     start_time = time.time()
     try:
         output = pipe(
         log(f"❌ Generation failed: {e}")
         return "", history, "\n".join(log_lines)
+    # Clean reply
     reply = output[len(context):].strip()
     reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
     reply = re.sub(r"\s{2,}", " ", reply).strip()
     reply = reply.split("User:")[0].split("Assistant:")[0].strip()
+    log(f"🪄 Model reply: {reply}")
     history.append((message, reply))
     return "", history, "\n".join(log_lines)
+# ===== Gradio =====
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+    gr.Markdown("## 💬 Qwen LoRA-only — Bhagavad Gita Assistant")
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(height=500)
+            msg = gr.Textbox(placeholder="Ask about the Gita...", label="Your Message")
             clear = gr.Button("Clear")
         with gr.Column(scale=1):
             log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)
     msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
     clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)