rahul7star commited on
Commit
eca54e8
Β·
verified Β·
1 Parent(s): ea6cef5

Create app_strict_lora.py

Browse files
Files changed (1) hide show
  1. app_strict_lora.py +115 -0
app_strict_lora.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py (LoRA-only loading)
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
4
+ import torch
5
+ import os
6
+ import re
7
+ import json
8
+ import time
9
+ from datetime import datetime
10
+ from huggingface_hub import model_info
11
+
12
+ # ===== Settings =====
13
+ device = 0 if torch.cuda.is_available() else -1
14
+ lora_repo = "rahul7star/GPT-Diffuser-v1" # ONLY LoRA fine-tuned repo
15
+
16
+ log_lines = []
17
+
18
+ def log(msg):
19
+ line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
20
+ print(line)
21
+ log_lines.append(line)
22
+
23
+ log(f"πŸš€ Loading LoRA-only model from {lora_repo}")
24
+ log(f"Device: {'GPU' if device==0 else 'CPU'}")
25
+
26
+ # ====== Tokenizer ======
27
+ try:
28
+ tokenizer = AutoTokenizer.from_pretrained(lora_repo, trust_remote_code=True)
29
+ if tokenizer.pad_token is None:
30
+ tokenizer.pad_token = tokenizer.eos_token
31
+ log(f"βœ… Tokenizer loaded: vocab size {tokenizer.vocab_size}")
32
+ except Exception as e:
33
+ log(f"❌ Tokenizer load failed: {e}")
34
+ tokenizer = None
35
+
36
+ # ====== LoRA-only model ======
37
+ model = None
38
+ pipe = None
39
+ try:
40
+ model = AutoModelForCausalLM.from_pretrained(
41
+ lora_repo,
42
+ trust_remote_code=True,
43
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
44
+ device_map="auto" if torch.cuda.is_available() else None,
45
+ )
46
+ model.eval()
47
+ log("βœ… LoRA-only model loaded successfully")
48
+ pipe = pipeline(
49
+ "text-generation",
50
+ model=model,
51
+ tokenizer=tokenizer,
52
+ device=device,
53
+ )
54
+ log("βœ… Pipeline ready for inference")
55
+ except Exception as e:
56
+ log(f"❌ LoRA model load failed: {e}")
57
+
58
+ # ====== Chat Function ======
59
+ def chat_with_model(message, history):
60
+ log_lines.clear()
61
+ log(f"πŸ’­ User message: {message}")
62
+
63
+ if pipe is None:
64
+ return "", history, "⚠️ Model pipeline not loaded."
65
+
66
+ context = "The following is a conversation between a user and an AI assistant trained on GIT souce code.\n"
67
+ for user, bot in history:
68
+ context += f"User: {user}\nAssistant: {bot}\n"
69
+ context += f"User: {message}\nAssistant:"
70
+
71
+ log("πŸ“„ Built conversation context")
72
+ log(context)
73
+
74
+ start_time = time.time()
75
+ try:
76
+ output = pipe(
77
+ context,
78
+ max_new_tokens=200,
79
+ do_sample=True,
80
+ temperature=0.7,
81
+ top_p=0.9,
82
+ repetition_penalty=1.1,
83
+ )[0]["generated_text"]
84
+ log(f"⏱️ Inference took {time.time() - start_time:.2f}s")
85
+ except Exception as e:
86
+ log(f"❌ Generation failed: {e}")
87
+ return "", history, "\n".join(log_lines)
88
+
89
+ # Clean reply
90
+ reply = output[len(context):].strip()
91
+ reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
92
+ reply = re.sub(r"\s{2,}", " ", reply).strip()
93
+ reply = reply.split("User:")[0].split("Assistant:")[0].strip()
94
+
95
+ log(f"πŸͺ„ Model reply: {reply}")
96
+ history.append((message, reply))
97
+ return "", history, "\n".join(log_lines)
98
+
99
+ # ===== Gradio =====
100
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
101
+ gr.Markdown("## πŸ’¬ Qwen LoRA-only β€” Bhagavad Gita Assistant")
102
+
103
+ with gr.Row():
104
+ with gr.Column(scale=2):
105
+ chatbot = gr.Chatbot(height=500)
106
+ msg = gr.Textbox(placeholder="Ask about the Gita...", label="Your Message")
107
+ clear = gr.Button("Clear")
108
+ with gr.Column(scale=1):
109
+ log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)
110
+
111
+ msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
112
+ clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)
113
+
114
+ if __name__ == "__main__":
115
+ demo.launch(server_name="0.0.0.0", server_port=7860)