Spaces:
Sleeping
Sleeping
| # app.py | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline | |
| import torch | |
| import os | |
| import re | |
| import json | |
| import time | |
| from datetime import datetime | |
| from huggingface_hub import hf_hub_download, model_info | |
| # ====== Load Model ====== | |
| device = 0 if torch.cuda.is_available() else -1 | |
| model_name = "rahul7star/Qwen2.5-3B-Instruct" | |
| log_lines = [] | |
| def log(msg): | |
| """Append timestamped message to log.""" | |
| line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}" | |
| print(line) | |
| log_lines.append(line) | |
| log("🔍 Initializing model load sequence...") | |
| log(f"Using model: {model_name}") | |
| log(f"Detected device: {'GPU' if device == 0 else 'CPU'}") | |
| hf_cache = os.path.expanduser("~/.cache/huggingface/hub") | |
| log(f"Model cache directory: {hf_cache}") | |
| # ====== Inspect Hugging Face repo ====== | |
| try: | |
| info = model_info(model_name) | |
| log("📦 Hugging Face model card info loaded:") | |
| log(f" - Model ID: {info.id}") | |
| log(f" - Private: {info.private}") | |
| log(f" - Last modified: {info.last_modified}") | |
| log(f" - Files count: {len(info.siblings)}") | |
| for s in info.siblings[:5]: | |
| log(f" · {s.rfilename}") | |
| except Exception as e: | |
| log(f"⚠️ Could not fetch model card info: {e}") | |
| # ====== Load Config ====== | |
| try: | |
| config = AutoConfig.from_pretrained(model_name, trust_remote_code=True) | |
| log("✅ Loaded model configuration:") | |
| log(json.dumps(config.to_dict(), indent=2)[:800] + " ...") | |
| except Exception as e: | |
| log(f"⚠️ Could not read model config: {e}") | |
| config = None | |
| # ====== Load Tokenizer ====== | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| log("✅ Tokenizer loaded successfully.") | |
| log(f"Tokenizer vocab size: {tokenizer.vocab_size}") | |
| except Exception as e: | |
| log(f"⚠️ Could not load tokenizer: {e}") | |
| tokenizer = None | |
| # ====== Load Model ====== | |
| model = None | |
| pipe = None | |
| try: | |
| start_load = time.time() | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto" if torch.cuda.is_available() else None, | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device=device, | |
| ) | |
| log(f"✅ Model pipeline fully loaded in {time.time() - start_load:.2f} seconds.") | |
| log(f"📂 Actual model source: {model.name_or_path}") | |
| log(f"🧩 Architecture: {getattr(model.config, 'architectures', ['Unknown'])}") | |
| except Exception as e: | |
| log(f"❌ Model failed to load: {e}") | |
| # ====== Detect if custom fine-tune ====== | |
| try: | |
| repo_base = model_name.split("/")[0] | |
| if "rahul7star" in model.name_or_path: | |
| log("✅ Verified: Model files are correctly loaded from your custom repo.") | |
| elif "Qwen" in model.name_or_path: | |
| log("⚠️ Warning: The model resolved to the base model Qwen — your fine-tuned weights may be missing.") | |
| log(" → Check if 'pytorch_model.bin' or 'adapter_model.safetensors' exists in your repo.") | |
| else: | |
| log("ℹ️ Loaded from unknown source, verify repository structure manually.") | |
| except Exception as e: | |
| log(f"⚠️ Source verification failed: {e}") | |
| # ====== Try to extract dataset/training info ====== | |
| def extract_training_info(model_name): | |
| """Try to read training details (dataset, fine-tuning source) from model repo files.""" | |
| data = {} | |
| try: | |
| # Try README | |
| readme_path = hf_hub_download(model_name, filename="README.md") | |
| with open(readme_path, "r", encoding="utf-8") as f: | |
| readme_text = f.read() | |
| log("📖 Found README.md — scanning for dataset references...") | |
| matches = re.findall(r"(rahul7star/\w+|dataset|fine[- ]?tune|trained on|data:)", readme_text, re.I) | |
| if matches: | |
| data["readme_mentions"] = matches[:5] | |
| log(f"✅ README mentions possible dataset: {matches[:5]}") | |
| else: | |
| log("ℹ️ No explicit dataset mention found in README.") | |
| except Exception as e: | |
| log(f"⚠️ No README.md found or could not read: {e}") | |
| # Try config.json or adapter_config.json | |
| for fname in ["config.json", "adapter_config.json"]: | |
| try: | |
| fpath = hf_hub_download(model_name, filename=fname) | |
| with open(fpath, "r", encoding="utf-8") as f: | |
| content = json.load(f) | |
| for k in ["dataset", "train_data", "base_model_name_or_path"]: | |
| if k in content: | |
| data[k] = content[k] | |
| log(f"✅ Found '{k}' in {fname}: {content[k]}") | |
| except Exception: | |
| pass | |
| if not data: | |
| log("⚠️ No training dataset info detected in model files.") | |
| return data | |
| training_info = extract_training_info(model_name) | |
| # ====== Chat Function ====== | |
| def chat_with_model(message, history): | |
| log_lines.clear() | |
| log("💭 Starting chat generation process...") | |
| log(f"User message: {message}") | |
| if pipe is None: | |
| return "", history, "⚠️ Model pipeline not loaded. Please check initialization logs." | |
| # 1️⃣ Build conversation context | |
| context = "The following is a conversation between a user and an AI assistant inspired by the Bhagavad Gita.\n" | |
| for user, bot in history: | |
| context += f"User: {user}\nAssistant: {bot}\n" | |
| context += f"User: {message}\nAssistant:" | |
| log("📄 Built conversation context:") | |
| log(context) | |
| # 2️⃣ Generate response | |
| log("🧠 Encoding input and generating response...") | |
| start_time = time.time() | |
| try: | |
| output = pipe( | |
| context, | |
| max_new_tokens=200, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| repetition_penalty=1.1, | |
| truncation=True, | |
| )[0]["generated_text"] | |
| log(f"⏱️ Inference took {time.time() - start_time:.2f} seconds") | |
| except Exception as e: | |
| log(f"❌ Generation failed: {e}") | |
| return "", history, "\n".join(log_lines) | |
| # 3️⃣ Clean model output | |
| reply = output[len(context):].strip() | |
| reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply) | |
| reply = re.sub(r"\s{2,}", " ", reply).strip() | |
| reply = reply.split("User:")[0].split("Assistant:")[0].strip() | |
| log("🪄 Cleaned model output successfully.") | |
| log(f"Model reply: {reply}") | |
| history.append((message, reply)) | |
| return "", history, "\n".join(log_lines) | |
| # ====== Gradio Interface ====== | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo: | |
| gr.Markdown("## 💬 Qwen0.5-3B-Gita — Conversational Assistant with Detailed Debug Log") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot(height=500) | |
| msg = gr.Textbox(placeholder="Ask about the Gita, life, or philosophy...", label="Your Message") | |
| clear = gr.Button("Clear") | |
| with gr.Column(scale=1): | |
| log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False) | |
| msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box]) | |
| clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False) | |
| # ====== Launch ====== | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |