rahul7star commited on
Commit
da321c2
·
verified ·
1 Parent(s): d8e2e58

Update app1.py

Browse files
Files changed (1) hide show
  1. app1.py +34 -90
app1.py CHANGED
@@ -1,129 +1,76 @@
1
- # app.py
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
4
- from peft import PeftModel
5
  import torch
6
  import os
7
  import re
8
  import json
9
  import time
10
  from datetime import datetime
11
- from huggingface_hub import hf_hub_download, model_info
12
 
13
- # ====== Settings ======
14
  device = 0 if torch.cuda.is_available() else -1
15
- base_model_name = "Qwen/Qwen2.5-0.5B" # Base model for LoRA
16
- finetuned_repo = "rahul7star/Qwen2.5-3B-Gita"
17
 
18
  log_lines = []
19
 
20
  def log(msg):
21
- """Append timestamped message to log."""
22
  line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
23
  print(line)
24
  log_lines.append(line)
25
 
26
- # ====== Start Logging ======
27
- log("🔍 Initializing model load sequence...")
28
- log(f"Base model: {base_model_name}")
29
- log(f"Fine-tuned LoRA repo: {finetuned_repo}")
30
- log(f"Device detected: {'GPU' if device==0 else 'CPU'}")
31
- hf_cache = os.path.expanduser("~/.cache/huggingface/hub")
32
- log(f"Model cache directory: {hf_cache}")
33
 
34
- # ====== Inspect Hugging Face repo ======
35
  try:
36
- info = model_info(finetuned_repo)
37
- log("📦 Hugging Face repo info loaded:")
38
- log(f" - Model ID: {info.id}")
39
- log(f" - Private: {info.private}")
40
- log(f" - Last modified: {info.last_modified}")
41
- log(f" - Files count: {len(info.siblings)}")
42
- for s in info.siblings[:5]:
43
- log(f" · {s.rfilename}")
44
- except Exception as e:
45
- log(f"⚠️ Could not fetch model info: {e}")
46
-
47
- # ====== Load base model and tokenizer ======
48
- try:
49
- tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
50
  if tokenizer.pad_token is None:
51
  tokenizer.pad_token = tokenizer.eos_token
52
  log(f"✅ Tokenizer loaded: vocab size {tokenizer.vocab_size}")
53
  except Exception as e:
54
- log(f"❌ Failed to load tokenizer: {e}")
55
  tokenizer = None
56
 
 
 
 
57
  try:
58
- base_model = AutoModelForCausalLM.from_pretrained(
59
- base_model_name,
60
  trust_remote_code=True,
61
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
62
  device_map="auto" if torch.cuda.is_available() else None,
63
  )
64
- log(f"✅ Base model loaded from {base_model_name}")
65
- except Exception as e:
66
- log(f"❌ Failed to load base model: {e}")
67
- base_model = None
68
-
69
- # ====== Load fine-tuned LoRA weights ======
70
- model = None
71
- pipe = None
72
- try:
73
- if base_model is not None:
74
- model = PeftModel.from_pretrained(base_model, finetuned_repo)
75
- model.eval()
76
- log(f"✅ LoRA fine-tuned model loaded from {finetuned_repo}")
77
- log(f"🧩 Model architecture: {getattr(model.config, 'architectures', ['Unknown'])}")
78
-
79
- pipe = pipeline(
80
- "text-generation",
81
- model=model,
82
- tokenizer=tokenizer,
83
- device=device,
84
- )
85
- log("✅ Pipeline ready for inference")
86
  except Exception as e:
87
- log(f"❌ Failed to load LoRA model: {e}")
88
-
89
- # ====== Try to extract training info ======
90
- def extract_training_info(repo_name):
91
- data = {}
92
- try:
93
- readme_path = hf_hub_download(repo_name, filename="README.md")
94
- with open(readme_path, "r", encoding="utf-8") as f:
95
- text = f.read()
96
- matches = re.findall(r"(rahul7star/\w+|dataset|fine[- ]?tune|trained on|data:)", text, re.I)
97
- if matches:
98
- data["readme_mentions"] = matches[:5]
99
- log(f"✅ README mentions dataset/fine-tune: {matches[:5]}")
100
- else:
101
- log("ℹ️ No dataset reference found in README")
102
- except Exception as e:
103
- log(f"⚠️ README not found or unreadable: {e}")
104
- return data
105
-
106
- training_info = extract_training_info(finetuned_repo)
107
 
108
  # ====== Chat Function ======
109
  def chat_with_model(message, history):
110
  log_lines.clear()
111
- log("💭 Starting chat generation...")
112
- log(f"User message: {message}")
113
 
114
  if pipe is None:
115
- return "", history, "⚠️ Model pipeline not loaded. Check logs."
116
 
117
- # Build context
118
- context = "The following is a conversation between a user and an AI assistant inspired by the Bhagavad Gita.\n"
119
  for user, bot in history:
120
  context += f"User: {user}\nAssistant: {bot}\n"
121
  context += f"User: {message}\nAssistant:"
122
- log("📄 Context built:")
 
123
  log(context)
124
 
125
- # Generate
126
- log("🧠 Generating response...")
127
  start_time = time.time()
128
  try:
129
  output = pipe(
@@ -139,26 +86,24 @@ def chat_with_model(message, history):
139
  log(f"❌ Generation failed: {e}")
140
  return "", history, "\n".join(log_lines)
141
 
142
- # Clean output
143
  reply = output[len(context):].strip()
144
  reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
145
  reply = re.sub(r"\s{2,}", " ", reply).strip()
146
  reply = reply.split("User:")[0].split("Assistant:")[0].strip()
147
 
148
- log("🪄 Output cleaned successfully")
149
- log(f"Model reply: {reply}")
150
-
151
  history.append((message, reply))
152
  return "", history, "\n".join(log_lines)
153
 
154
- # ====== Gradio Interface ======
155
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
156
- gr.Markdown("## 💬 Qwen GitaLoRA Fine-tuned Conversational Assistant")
157
 
158
  with gr.Row():
159
  with gr.Column(scale=2):
160
  chatbot = gr.Chatbot(height=500)
161
- msg = gr.Textbox(placeholder="Ask about the Gita, life, or philosophy...", label="Your Message")
162
  clear = gr.Button("Clear")
163
  with gr.Column(scale=1):
164
  log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)
@@ -166,6 +111,5 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
166
  msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
167
  clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)
168
 
169
- # ====== Launch ======
170
  if __name__ == "__main__":
171
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ # app.py (LoRA-only loading)
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
 
4
  import torch
5
  import os
6
  import re
7
  import json
8
  import time
9
  from datetime import datetime
10
+ from huggingface_hub import model_info
11
 
12
+ # ===== Settings =====
13
  device = 0 if torch.cuda.is_available() else -1
14
+ lora_repo = "rahul7star/Qwen2.5-3B-Gita" # ONLY LoRA fine-tuned repo
 
15
 
16
  log_lines = []
17
 
18
  def log(msg):
 
19
  line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
20
  print(line)
21
  log_lines.append(line)
22
 
23
+ log(f"🚀 Loading LoRA-only model from {lora_repo}")
24
+ log(f"Device: {'GPU' if device==0 else 'CPU'}")
 
 
 
 
 
25
 
26
+ # ====== Tokenizer ======
27
  try:
28
+ tokenizer = AutoTokenizer.from_pretrained(lora_repo, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  if tokenizer.pad_token is None:
30
  tokenizer.pad_token = tokenizer.eos_token
31
  log(f"✅ Tokenizer loaded: vocab size {tokenizer.vocab_size}")
32
  except Exception as e:
33
+ log(f"❌ Tokenizer load failed: {e}")
34
  tokenizer = None
35
 
36
+ # ====== LoRA-only model ======
37
+ model = None
38
+ pipe = None
39
  try:
40
+ model = AutoModelForCausalLM.from_pretrained(
41
+ lora_repo,
42
  trust_remote_code=True,
43
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
44
  device_map="auto" if torch.cuda.is_available() else None,
45
  )
46
+ model.eval()
47
+ log("✅ LoRA-only model loaded successfully")
48
+ pipe = pipeline(
49
+ "text-generation",
50
+ model=model,
51
+ tokenizer=tokenizer,
52
+ device=device,
53
+ )
54
+ log("✅ Pipeline ready for inference")
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  except Exception as e:
56
+ log(f"❌ LoRA model load failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  # ====== Chat Function ======
59
  def chat_with_model(message, history):
60
  log_lines.clear()
61
+ log(f"💭 User message: {message}")
 
62
 
63
  if pipe is None:
64
+ return "", history, "⚠️ Model pipeline not loaded."
65
 
66
+ context = "The following is a conversation between a user and an AI assistant trained on Bhagavad Gita excerpts.\n"
 
67
  for user, bot in history:
68
  context += f"User: {user}\nAssistant: {bot}\n"
69
  context += f"User: {message}\nAssistant:"
70
+
71
+ log("📄 Built conversation context")
72
  log(context)
73
 
 
 
74
  start_time = time.time()
75
  try:
76
  output = pipe(
 
86
  log(f"❌ Generation failed: {e}")
87
  return "", history, "\n".join(log_lines)
88
 
89
+ # Clean reply
90
  reply = output[len(context):].strip()
91
  reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
92
  reply = re.sub(r"\s{2,}", " ", reply).strip()
93
  reply = reply.split("User:")[0].split("Assistant:")[0].strip()
94
 
95
+ log(f"🪄 Model reply: {reply}")
 
 
96
  history.append((message, reply))
97
  return "", history, "\n".join(log_lines)
98
 
99
+ # ===== Gradio =====
100
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
101
+ gr.Markdown("## 💬 Qwen LoRA-onlyBhagavad Gita Assistant")
102
 
103
  with gr.Row():
104
  with gr.Column(scale=2):
105
  chatbot = gr.Chatbot(height=500)
106
+ msg = gr.Textbox(placeholder="Ask about the Gita...", label="Your Message")
107
  clear = gr.Button("Clear")
108
  with gr.Column(scale=1):
109
  log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)
 
111
  msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
112
  clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)
113
 
 
114
  if __name__ == "__main__":
115
  demo.launch(server_name="0.0.0.0", server_port=7860)