Spaces:
Sleeping
Sleeping
File size: 7,427 Bytes
eaf46f0 d6704b1 eaf46f0 7d67b35 7762f7f 7d67b35 88445a9 eaf46f0 3a02cb2 7d67b35 88445a9 7d67b35 d6704b1 7d67b35 d6704b1 88445a9 7d67b35 88445a9 7d67b35 d6704b1 7d67b35 d6704b1 7d67b35 88445a9 7d67b35 d6704b1 88445a9 d6704b1 88445a9 d6704b1 c4a7223 88445a9 eaf46f0 7d67b35 d6704b1 7d67b35 7762f7f eaf46f0 7762f7f 7d67b35 7762f7f d6704b1 7d67b35 d6704b1 88445a9 7762f7f eaf46f0 d6704b1 7d67b35 eaf46f0 7d67b35 eaf46f0 7762f7f eaf46f0 88445a9 eaf46f0 7d67b35 eaf46f0 7d67b35 eaf46f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
# app.py
import gradio as gr
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
import torch
import os
import re
import json
import time
from datetime import datetime
from huggingface_hub import hf_hub_download, model_info
# ====== Load Model ======
device = 0 if torch.cuda.is_available() else -1
model_name = "rahul7star/Qwen2.5-3B-Instruct"
log_lines = []
def log(msg):
"""Append timestamped message to log."""
line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
print(line)
log_lines.append(line)
log("🔍 Initializing model load sequence...")
log(f"Using model: {model_name}")
log(f"Detected device: {'GPU' if device == 0 else 'CPU'}")
hf_cache = os.path.expanduser("~/.cache/huggingface/hub")
log(f"Model cache directory: {hf_cache}")
# ====== Inspect Hugging Face repo ======
try:
info = model_info(model_name)
log("📦 Hugging Face model card info loaded:")
log(f" - Model ID: {info.id}")
log(f" - Private: {info.private}")
log(f" - Last modified: {info.last_modified}")
log(f" - Files count: {len(info.siblings)}")
for s in info.siblings[:5]:
log(f" · {s.rfilename}")
except Exception as e:
log(f"⚠️ Could not fetch model card info: {e}")
# ====== Load Config ======
try:
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
log("✅ Loaded model configuration:")
log(json.dumps(config.to_dict(), indent=2)[:800] + " ...")
except Exception as e:
log(f"⚠️ Could not read model config: {e}")
config = None
# ====== Load Tokenizer ======
try:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
log("✅ Tokenizer loaded successfully.")
log(f"Tokenizer vocab size: {tokenizer.vocab_size}")
except Exception as e:
log(f"⚠️ Could not load tokenizer: {e}")
tokenizer = None
# ====== Load Model ======
model = None
pipe = None
try:
start_load = time.time()
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device=device,
)
log(f"✅ Model pipeline fully loaded in {time.time() - start_load:.2f} seconds.")
log(f"📂 Actual model source: {model.name_or_path}")
log(f"🧩 Architecture: {getattr(model.config, 'architectures', ['Unknown'])}")
except Exception as e:
log(f"❌ Model failed to load: {e}")
# ====== Detect if custom fine-tune ======
try:
repo_base = model_name.split("/")[0]
if "rahul7star" in model.name_or_path:
log("✅ Verified: Model files are correctly loaded from your custom repo.")
elif "Qwen" in model.name_or_path:
log("⚠️ Warning: The model resolved to the base model Qwen — your fine-tuned weights may be missing.")
log(" → Check if 'pytorch_model.bin' or 'adapter_model.safetensors' exists in your repo.")
else:
log("ℹ️ Loaded from unknown source, verify repository structure manually.")
except Exception as e:
log(f"⚠️ Source verification failed: {e}")
# ====== Try to extract dataset/training info ======
def extract_training_info(model_name):
"""Try to read training details (dataset, fine-tuning source) from model repo files."""
data = {}
try:
# Try README
readme_path = hf_hub_download(model_name, filename="README.md")
with open(readme_path, "r", encoding="utf-8") as f:
readme_text = f.read()
log("📖 Found README.md — scanning for dataset references...")
matches = re.findall(r"(rahul7star/\w+|dataset|fine[- ]?tune|trained on|data:)", readme_text, re.I)
if matches:
data["readme_mentions"] = matches[:5]
log(f"✅ README mentions possible dataset: {matches[:5]}")
else:
log("ℹ️ No explicit dataset mention found in README.")
except Exception as e:
log(f"⚠️ No README.md found or could not read: {e}")
# Try config.json or adapter_config.json
for fname in ["config.json", "adapter_config.json"]:
try:
fpath = hf_hub_download(model_name, filename=fname)
with open(fpath, "r", encoding="utf-8") as f:
content = json.load(f)
for k in ["dataset", "train_data", "base_model_name_or_path"]:
if k in content:
data[k] = content[k]
log(f"✅ Found '{k}' in {fname}: {content[k]}")
except Exception:
pass
if not data:
log("⚠️ No training dataset info detected in model files.")
return data
training_info = extract_training_info(model_name)
# ====== Chat Function ======
def chat_with_model(message, history):
log_lines.clear()
log("💭 Starting chat generation process...")
log(f"User message: {message}")
if pipe is None:
return "", history, "⚠️ Model pipeline not loaded. Please check initialization logs."
# 1️⃣ Build conversation context
context = "The following is a conversation between a user and an AI assistant inspired by the Bhagavad Gita.\n"
for user, bot in history:
context += f"User: {user}\nAssistant: {bot}\n"
context += f"User: {message}\nAssistant:"
log("📄 Built conversation context:")
log(context)
# 2️⃣ Generate response
log("🧠 Encoding input and generating response...")
start_time = time.time()
try:
output = pipe(
context,
max_new_tokens=200,
do_sample=True,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.1,
truncation=True,
)[0]["generated_text"]
log(f"⏱️ Inference took {time.time() - start_time:.2f} seconds")
except Exception as e:
log(f"❌ Generation failed: {e}")
return "", history, "\n".join(log_lines)
# 3️⃣ Clean model output
reply = output[len(context):].strip()
reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
reply = re.sub(r"\s{2,}", " ", reply).strip()
reply = reply.split("User:")[0].split("Assistant:")[0].strip()
log("🪄 Cleaned model output successfully.")
log(f"Model reply: {reply}")
history.append((message, reply))
return "", history, "\n".join(log_lines)
# ====== Gradio Interface ======
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
gr.Markdown("## 💬 Qwen0.5-3B-Gita — Conversational Assistant with Detailed Debug Log")
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(placeholder="Ask about the Gita, life, or philosophy...", label="Your Message")
clear = gr.Button("Clear")
with gr.Column(scale=1):
log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)
msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)
# ====== Launch ======
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|