File size: 3,709 Bytes
da321c2
59847fe
 
 
 
 
 
 
 
da321c2
59847fe
da321c2
59847fe
ea6cef5
59847fe
 
 
 
 
 
 
 
da321c2
 
59847fe
da321c2
59847fe
da321c2
59847fe
 
 
 
da321c2
59847fe
 
da321c2
 
 
59847fe
da321c2
 
59847fe
 
 
 
da321c2
 
 
 
 
 
 
 
 
59847fe
da321c2
59847fe
 
 
 
da321c2
59847fe
 
da321c2
59847fe
efb888a
59847fe
 
 
da321c2
 
59847fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da321c2
59847fe
 
 
 
 
da321c2
59847fe
 
 
da321c2
59847fe
da321c2
59847fe
 
 
 
da321c2
59847fe
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# app.py (LoRA-only loading)
import gradio as gr
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
import torch
import os
import re
import json
import time
from datetime import datetime
from huggingface_hub import model_info

# ===== Settings =====
device = 0 if torch.cuda.is_available() else -1
lora_repo = "rahul7star/GPT-Diffuser-v1"  # ONLY LoRA fine-tuned repo

log_lines = []

def log(msg):
    line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
    print(line)
    log_lines.append(line)

log(f"πŸš€ Loading LoRA-only model from {lora_repo}")
log(f"Device: {'GPU' if device==0 else 'CPU'}")

# ====== Tokenizer ======
try:
    tokenizer = AutoTokenizer.from_pretrained(lora_repo, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    log(f"βœ… Tokenizer loaded: vocab size {tokenizer.vocab_size}")
except Exception as e:
    log(f"❌ Tokenizer load failed: {e}")
    tokenizer = None

# ====== LoRA-only model ======
model = None
pipe = None
try:
    model = AutoModelForCausalLM.from_pretrained(
        lora_repo,
        trust_remote_code=True,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto" if torch.cuda.is_available() else None,
    )
    model.eval()
    log("βœ… LoRA-only model loaded successfully")
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device=device,
    )
    log("βœ… Pipeline ready for inference")
except Exception as e:
    log(f"❌ LoRA model load failed: {e}")

# ====== Chat Function ======
def chat_with_model(message, history):
    log_lines.clear()
    log(f"πŸ’­ User message: {message}")

    if pipe is None:
        return "", history, "⚠️ Model pipeline not loaded."

    context = "The following is a conversation between a user and an AI assistant trained on GIT souce code.\n"
    for user, bot in history:
        context += f"User: {user}\nAssistant: {bot}\n"
    context += f"User: {message}\nAssistant:"

    log("πŸ“„ Built conversation context")
    log(context)

    start_time = time.time()
    try:
        output = pipe(
            context,
            max_new_tokens=200,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
        )[0]["generated_text"]
        log(f"⏱️ Inference took {time.time() - start_time:.2f}s")
    except Exception as e:
        log(f"❌ Generation failed: {e}")
        return "", history, "\n".join(log_lines)

    # Clean reply
    reply = output[len(context):].strip()
    reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
    reply = re.sub(r"\s{2,}", " ", reply).strip()
    reply = reply.split("User:")[0].split("Assistant:")[0].strip()

    log(f"πŸͺ„ Model reply: {reply}")
    history.append((message, reply))
    return "", history, "\n".join(log_lines)

# ===== Gradio =====
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
    gr.Markdown("## πŸ’¬ Qwen LoRA-only β€” Bhagavad Gita Assistant")

    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(height=500)
            msg = gr.Textbox(placeholder="Ask about the Gita...", label="Your Message")
            clear = gr.Button("Clear")
        with gr.Column(scale=1):
            log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)

    msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
    clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)