File size: 7,427 Bytes
eaf46f0
 
d6704b1
eaf46f0
7d67b35
7762f7f
7d67b35
 
 
88445a9
eaf46f0
 
 
3a02cb2
7d67b35
 
 
 
 
 
 
 
 
 
 
 
 
 
88445a9
 
 
 
 
 
 
 
 
 
 
 
 
 
7d67b35
d6704b1
7d67b35
d6704b1
88445a9
7d67b35
 
 
88445a9
7d67b35
d6704b1
7d67b35
d6704b1
7d67b35
 
 
 
88445a9
7d67b35
d6704b1
88445a9
 
d6704b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88445a9
d6704b1
 
c4a7223
88445a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eaf46f0
 
 
7d67b35
 
 
 
d6704b1
 
 
7d67b35
7762f7f
eaf46f0
7762f7f
 
7d67b35
 
7762f7f
d6704b1
7d67b35
 
d6704b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88445a9
7762f7f
 
 
 
eaf46f0
d6704b1
 
7d67b35
eaf46f0
7d67b35
eaf46f0
7762f7f
eaf46f0
 
88445a9
eaf46f0
7d67b35
 
 
 
 
 
 
eaf46f0
7d67b35
 
eaf46f0
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# app.py
import gradio as gr
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
import torch
import os
import re
import json
import time
from datetime import datetime
from huggingface_hub import hf_hub_download, model_info

# ====== Load Model ======
device = 0 if torch.cuda.is_available() else -1
model_name = "rahul7star/Qwen2.5-3B-Instruct"

log_lines = []

def log(msg):
    """Append timestamped message to log."""
    line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
    print(line)
    log_lines.append(line)

log("🔍 Initializing model load sequence...")
log(f"Using model: {model_name}")
log(f"Detected device: {'GPU' if device == 0 else 'CPU'}")

hf_cache = os.path.expanduser("~/.cache/huggingface/hub")
log(f"Model cache directory: {hf_cache}")

# ====== Inspect Hugging Face repo ======
try:
    info = model_info(model_name)
    log("📦 Hugging Face model card info loaded:")
    log(f"  - Model ID: {info.id}")
    log(f"  - Private: {info.private}")
    log(f"  - Last modified: {info.last_modified}")
    log(f"  - Files count: {len(info.siblings)}")
    for s in info.siblings[:5]:
        log(f"    · {s.rfilename}")
except Exception as e:
    log(f"⚠️ Could not fetch model card info: {e}")

# ====== Load Config ======
try:
    config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
    log("✅ Loaded model configuration:")
    log(json.dumps(config.to_dict(), indent=2)[:800] + " ...")
except Exception as e:
    log(f"⚠️ Could not read model config: {e}")
    config = None

# ====== Load Tokenizer ======
try:
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    log("✅ Tokenizer loaded successfully.")
    log(f"Tokenizer vocab size: {tokenizer.vocab_size}")
except Exception as e:
    log(f"⚠️ Could not load tokenizer: {e}")
    tokenizer = None

# ====== Load Model ======
model = None
pipe = None
try:
    start_load = time.time()
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto" if torch.cuda.is_available() else None,
    )
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device=device,
    )
    log(f"✅ Model pipeline fully loaded in {time.time() - start_load:.2f} seconds.")
    log(f"📂 Actual model source: {model.name_or_path}")
    log(f"🧩 Architecture: {getattr(model.config, 'architectures', ['Unknown'])}")
except Exception as e:
    log(f"❌ Model failed to load: {e}")

# ====== Detect if custom fine-tune ======
try:
    repo_base = model_name.split("/")[0]
    if "rahul7star" in model.name_or_path:
        log("✅ Verified: Model files are correctly loaded from your custom repo.")
    elif "Qwen" in model.name_or_path:
        log("⚠️ Warning: The model resolved to the base model Qwen — your fine-tuned weights may be missing.")
        log("   → Check if 'pytorch_model.bin' or 'adapter_model.safetensors' exists in your repo.")
    else:
        log("ℹ️ Loaded from unknown source, verify repository structure manually.")
except Exception as e:
    log(f"⚠️ Source verification failed: {e}")

# ====== Try to extract dataset/training info ======
def extract_training_info(model_name):
    """Try to read training details (dataset, fine-tuning source) from model repo files."""
    data = {}
    try:
        # Try README
        readme_path = hf_hub_download(model_name, filename="README.md")
        with open(readme_path, "r", encoding="utf-8") as f:
            readme_text = f.read()
            log("📖 Found README.md — scanning for dataset references...")
            matches = re.findall(r"(rahul7star/\w+|dataset|fine[- ]?tune|trained on|data:)", readme_text, re.I)
            if matches:
                data["readme_mentions"] = matches[:5]
                log(f"✅ README mentions possible dataset: {matches[:5]}")
            else:
                log("ℹ️ No explicit dataset mention found in README.")
    except Exception as e:
        log(f"⚠️ No README.md found or could not read: {e}")

    # Try config.json or adapter_config.json
    for fname in ["config.json", "adapter_config.json"]:
        try:
            fpath = hf_hub_download(model_name, filename=fname)
            with open(fpath, "r", encoding="utf-8") as f:
                content = json.load(f)
                for k in ["dataset", "train_data", "base_model_name_or_path"]:
                    if k in content:
                        data[k] = content[k]
                        log(f"✅ Found '{k}' in {fname}: {content[k]}")
        except Exception:
            pass

    if not data:
        log("⚠️ No training dataset info detected in model files.")
    return data

training_info = extract_training_info(model_name)

# ====== Chat Function ======
def chat_with_model(message, history):
    log_lines.clear()
    log("💭 Starting chat generation process...")
    log(f"User message: {message}")

    if pipe is None:
        return "", history, "⚠️ Model pipeline not loaded. Please check initialization logs."

    # 1️⃣ Build conversation context
    context = "The following is a conversation between a user and an AI assistant inspired by the Bhagavad Gita.\n"
    for user, bot in history:
        context += f"User: {user}\nAssistant: {bot}\n"
    context += f"User: {message}\nAssistant:"
    log("📄 Built conversation context:")
    log(context)

    # 2️⃣ Generate response
    log("🧠 Encoding input and generating response...")
    start_time = time.time()
    try:
        output = pipe(
            context,
            max_new_tokens=200,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            truncation=True,
        )[0]["generated_text"]
        log(f"⏱️ Inference took {time.time() - start_time:.2f} seconds")
    except Exception as e:
        log(f"❌ Generation failed: {e}")
        return "", history, "\n".join(log_lines)

    # 3️⃣ Clean model output
    reply = output[len(context):].strip()
    reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
    reply = re.sub(r"\s{2,}", " ", reply).strip()
    reply = reply.split("User:")[0].split("Assistant:")[0].strip()

    log("🪄 Cleaned model output successfully.")
    log(f"Model reply: {reply}")

    history.append((message, reply))
    return "", history, "\n".join(log_lines)


# ====== Gradio Interface ======
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
    gr.Markdown("## 💬 Qwen0.5-3B-Gita — Conversational Assistant with Detailed Debug Log")

    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(height=500)
            msg = gr.Textbox(placeholder="Ask about the Gita, life, or philosophy...", label="Your Message")
            clear = gr.Button("Clear")
        with gr.Column(scale=1):
            log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)

    msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
    clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)

# ====== Launch ======
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)