Diffuser-Chat0

Sleeping

App Files Files Community

rahul7star commited on Oct 13, 2025

Commit

d6704b1

verified ·

1 Parent(s): c4a7223

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -34

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # app.py
 import gradio as gr
-from transformers import pipeline, AutoTokenizer, AutoConfig, AutoModelForCausalLM
 import torch
 import os
 import re
@@ -24,34 +24,46 @@ log("🔍 Initializing model load sequence...")
 log(f"Using model: {model_name}")
 log(f"Detected device: {'GPU' if device == 0 else 'CPU'}")
-# Inspect model folder (once downloaded from HF cache)
 hf_cache = os.path.expanduser("~/.cache/huggingface/hub")
 log(f"Model will be loaded from local cache directory: {hf_cache}")
 try:
-    config = AutoConfig.from_pretrained(model_name)
     log("✅ Loaded configuration file:")
     log(json.dumps(config.to_dict(), indent=2)[:800] + " ...")
 except Exception as e:
     log(f"⚠️ Could not read model config: {e}")
 try:
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
     log("✅ Tokenizer loaded successfully.")
     log(f"Tokenizer vocab size: {tokenizer.vocab_size}")
-    log(f"Tokenizer files found in: {tokenizer.pretrained_vocab_files_map}")
 except Exception as e:
     log(f"⚠️ Could not load tokenizer: {e}")
-# Load model pipeline
-start_load = time.time()
-pipe = pipeline(
-    "text-generation",
-    model=model_name,
-    device=device,
-)
-log(f"✅ Model pipeline loaded in {time.time() - start_load:.2f} seconds.")
-print(pipe.model.config._name_or_path)
 # ====== Chat Function ======
@@ -60,6 +72,9 @@ def chat_with_model(message, history):
     log("💭 Starting chat generation process...")
     log(f"User message: {message}")
     # 1️⃣ Build conversation context
     context = "The following is a conversation between a user and an AI assistant inspired by the Bhagavad Gita.\n"
     for user, bot in history:
@@ -68,42 +83,45 @@ def chat_with_model(message, history):
     log("📄 Built conversation context:")
     log(context)
-    # 2️⃣ Encode and run model
     log("🧠 Encoding input and generating response...")
     start_time = time.time()
-    output = pipe(
-        context,
-        max_new_tokens=200,
-        do_sample=True,
-        temperature=0.7,
-        top_p=0.9,
-        repetition_penalty=1.1,
-        truncation=True,
-    )[0]["generated_text"]
-    log(f"⏱️ Inference took {time.time() - start_time:.2f} seconds")
-    # 3️⃣ Extract clean assistant reply
     reply = output[len(context):].strip()
     reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
     reply = re.sub(r"\s{2,}", " ", reply).strip()
     reply = reply.split("User:")[0].split("Assistant:")[0].strip()
-    log("🪄 Raw model output processed successfully.")
-    log(f"Model reply (cleaned): {reply}")
-    # 4️⃣ Log tokenizer + model folders
     try:
-        model_dir = pipe.model.name_or_path
-        log(f"📂 Model files are read from: {model_dir}")
         if os.path.exists(model_dir):
             for root, dirs, files in os.walk(model_dir):
-                for file in files[:5]:  # show first 5 files only
                     log(f"  - {os.path.join(root, file)}")
                 break
     except Exception as e:
         log(f"⚠️ Could not list model folder files: {e}")
-    # 5️⃣ Finalize
     history.append((message, reply))
     return "", history, "\n".join(log_lines)

 # app.py
 import gradio as gr
+from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
 import torch
 import os
 import re
 log(f"Using model: {model_name}")
 log(f"Detected device: {'GPU' if device == 0 else 'CPU'}")
 hf_cache = os.path.expanduser("~/.cache/huggingface/hub")
 log(f"Model will be loaded from local cache directory: {hf_cache}")
+# ====== Load Config ======
 try:
+    config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
     log("✅ Loaded configuration file:")
     log(json.dumps(config.to_dict(), indent=2)[:800] + " ...")
 except Exception as e:
     log(f"⚠️ Could not read model config: {e}")
+# ====== Load Tokenizer ======
 try:
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     log("✅ Tokenizer loaded successfully.")
     log(f"Tokenizer vocab size: {tokenizer.vocab_size}")
 except Exception as e:
     log(f"⚠️ Could not load tokenizer: {e}")
+# ====== Load Model ======
+try:
+    start_load = time.time()
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        trust_remote_code=True,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto" if torch.cuda.is_available() else None,
+    )
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        device=device,
+    )
+    log(f"✅ Model pipeline fully loaded in {time.time() - start_load:.2f} seconds.")
+    log(f"📂 Actual model source: {model.name_or_path}")
+    log(f"🧩 Architecture: {model.config.architectures if hasattr(model.config, 'architectures') else 'Unknown'}")
+except Exception as e:
+    log(f"❌ Model failed to load: {e}")
+    pipe = None
 # ====== Chat Function ======
     log("💭 Starting chat generation process...")
     log(f"User message: {message}")
+    if pipe is None:
+        return "", history, "⚠️ Model pipeline not loaded. Please check initialization logs."
     # 1️⃣ Build conversation context
     context = "The following is a conversation between a user and an AI assistant inspired by the Bhagavad Gita.\n"
     for user, bot in history:
     log("📄 Built conversation context:")
     log(context)
+    # 2️⃣ Generate response
     log("🧠 Encoding input and generating response...")
     start_time = time.time()
+    try:
+        output = pipe(
+            context,
+            max_new_tokens=200,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.1,
+            truncation=True,
+        )[0]["generated_text"]
+        log(f"⏱️ Inference took {time.time() - start_time:.2f} seconds")
+    except Exception as e:
+        log(f"❌ Generation failed: {e}")
+        return "", history, "\n".join(log_lines)
+    # 3️⃣ Extract and clean model reply
     reply = output[len(context):].strip()
     reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
     reply = re.sub(r"\s{2,}", " ", reply).strip()
     reply = reply.split("User:")[0].split("Assistant:")[0].strip()
+    log("🪄 Cleaned model output successfully.")
+    log(f"Model reply: {reply}")
+    # 4️⃣ Log model folder files
     try:
+        model_dir = model.name_or_path
+        log(f"📁 Model files read from: {model_dir}")
         if os.path.exists(model_dir):
             for root, dirs, files in os.walk(model_dir):
+                for file in files[:5]:
                     log(f"  - {os.path.join(root, file)}")
                 break
     except Exception as e:
         log(f"⚠️ Could not list model folder files: {e}")
     history.append((message, reply))
     return "", history, "\n".join(log_lines)