rahul7star commited on
Commit
d6704b1
·
verified ·
1 Parent(s): c4a7223

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -34
app.py CHANGED
@@ -1,6 +1,6 @@
1
  # app.py
2
  import gradio as gr
3
- from transformers import pipeline, AutoTokenizer, AutoConfig, AutoModelForCausalLM
4
  import torch
5
  import os
6
  import re
@@ -24,34 +24,46 @@ log("🔍 Initializing model load sequence...")
24
  log(f"Using model: {model_name}")
25
  log(f"Detected device: {'GPU' if device == 0 else 'CPU'}")
26
 
27
- # Inspect model folder (once downloaded from HF cache)
28
  hf_cache = os.path.expanduser("~/.cache/huggingface/hub")
29
  log(f"Model will be loaded from local cache directory: {hf_cache}")
30
 
 
31
  try:
32
- config = AutoConfig.from_pretrained(model_name)
33
  log("✅ Loaded configuration file:")
34
  log(json.dumps(config.to_dict(), indent=2)[:800] + " ...")
35
  except Exception as e:
36
  log(f"⚠️ Could not read model config: {e}")
37
 
 
38
  try:
39
- tokenizer = AutoTokenizer.from_pretrained(model_name)
40
  log("✅ Tokenizer loaded successfully.")
41
  log(f"Tokenizer vocab size: {tokenizer.vocab_size}")
42
- log(f"Tokenizer files found in: {tokenizer.pretrained_vocab_files_map}")
43
  except Exception as e:
44
  log(f"⚠️ Could not load tokenizer: {e}")
45
 
46
- # Load model pipeline
47
- start_load = time.time()
48
- pipe = pipeline(
49
- "text-generation",
50
- model=model_name,
51
- device=device,
52
- )
53
- log(f" Model pipeline loaded in {time.time() - start_load:.2f} seconds.")
54
- print(pipe.model.config._name_or_path)
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
 
57
  # ====== Chat Function ======
@@ -60,6 +72,9 @@ def chat_with_model(message, history):
60
  log("💭 Starting chat generation process...")
61
  log(f"User message: {message}")
62
 
 
 
 
63
  # 1️⃣ Build conversation context
64
  context = "The following is a conversation between a user and an AI assistant inspired by the Bhagavad Gita.\n"
65
  for user, bot in history:
@@ -68,42 +83,45 @@ def chat_with_model(message, history):
68
  log("📄 Built conversation context:")
69
  log(context)
70
 
71
- # 2️⃣ Encode and run model
72
  log("🧠 Encoding input and generating response...")
73
  start_time = time.time()
74
- output = pipe(
75
- context,
76
- max_new_tokens=200,
77
- do_sample=True,
78
- temperature=0.7,
79
- top_p=0.9,
80
- repetition_penalty=1.1,
81
- truncation=True,
82
- )[0]["generated_text"]
83
- log(f"⏱️ Inference took {time.time() - start_time:.2f} seconds")
84
-
85
- # 3️⃣ Extract clean assistant reply
 
 
 
 
86
  reply = output[len(context):].strip()
87
  reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
88
  reply = re.sub(r"\s{2,}", " ", reply).strip()
89
  reply = reply.split("User:")[0].split("Assistant:")[0].strip()
90
 
91
- log("🪄 Raw model output processed successfully.")
92
- log(f"Model reply (cleaned): {reply}")
93
 
94
- # 4️⃣ Log tokenizer + model folders
95
  try:
96
- model_dir = pipe.model.name_or_path
97
- log(f"📂 Model files are read from: {model_dir}")
98
  if os.path.exists(model_dir):
99
  for root, dirs, files in os.walk(model_dir):
100
- for file in files[:5]: # show first 5 files only
101
  log(f" - {os.path.join(root, file)}")
102
  break
103
  except Exception as e:
104
  log(f"⚠️ Could not list model folder files: {e}")
105
 
106
- # 5️⃣ Finalize
107
  history.append((message, reply))
108
  return "", history, "\n".join(log_lines)
109
 
 
1
  # app.py
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
4
  import torch
5
  import os
6
  import re
 
24
  log(f"Using model: {model_name}")
25
  log(f"Detected device: {'GPU' if device == 0 else 'CPU'}")
26
 
 
27
  hf_cache = os.path.expanduser("~/.cache/huggingface/hub")
28
  log(f"Model will be loaded from local cache directory: {hf_cache}")
29
 
30
+ # ====== Load Config ======
31
  try:
32
+ config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
33
  log("✅ Loaded configuration file:")
34
  log(json.dumps(config.to_dict(), indent=2)[:800] + " ...")
35
  except Exception as e:
36
  log(f"⚠️ Could not read model config: {e}")
37
 
38
+ # ====== Load Tokenizer ======
39
  try:
40
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
41
  log("✅ Tokenizer loaded successfully.")
42
  log(f"Tokenizer vocab size: {tokenizer.vocab_size}")
 
43
  except Exception as e:
44
  log(f"⚠️ Could not load tokenizer: {e}")
45
 
46
+ # ====== Load Model ======
47
+ try:
48
+ start_load = time.time()
49
+ model = AutoModelForCausalLM.from_pretrained(
50
+ model_name,
51
+ trust_remote_code=True,
52
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
53
+ device_map="auto" if torch.cuda.is_available() else None,
54
+ )
55
+ pipe = pipeline(
56
+ "text-generation",
57
+ model=model,
58
+ tokenizer=tokenizer,
59
+ device=device,
60
+ )
61
+ log(f"✅ Model pipeline fully loaded in {time.time() - start_load:.2f} seconds.")
62
+ log(f"📂 Actual model source: {model.name_or_path}")
63
+ log(f"🧩 Architecture: {model.config.architectures if hasattr(model.config, 'architectures') else 'Unknown'}")
64
+ except Exception as e:
65
+ log(f"❌ Model failed to load: {e}")
66
+ pipe = None
67
 
68
 
69
  # ====== Chat Function ======
 
72
  log("💭 Starting chat generation process...")
73
  log(f"User message: {message}")
74
 
75
+ if pipe is None:
76
+ return "", history, "⚠️ Model pipeline not loaded. Please check initialization logs."
77
+
78
  # 1️⃣ Build conversation context
79
  context = "The following is a conversation between a user and an AI assistant inspired by the Bhagavad Gita.\n"
80
  for user, bot in history:
 
83
  log("📄 Built conversation context:")
84
  log(context)
85
 
86
+ # 2️⃣ Generate response
87
  log("🧠 Encoding input and generating response...")
88
  start_time = time.time()
89
+ try:
90
+ output = pipe(
91
+ context,
92
+ max_new_tokens=200,
93
+ do_sample=True,
94
+ temperature=0.7,
95
+ top_p=0.9,
96
+ repetition_penalty=1.1,
97
+ truncation=True,
98
+ )[0]["generated_text"]
99
+ log(f"⏱️ Inference took {time.time() - start_time:.2f} seconds")
100
+ except Exception as e:
101
+ log(f"❌ Generation failed: {e}")
102
+ return "", history, "\n".join(log_lines)
103
+
104
+ # 3️⃣ Extract and clean model reply
105
  reply = output[len(context):].strip()
106
  reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply)
107
  reply = re.sub(r"\s{2,}", " ", reply).strip()
108
  reply = reply.split("User:")[0].split("Assistant:")[0].strip()
109
 
110
+ log("🪄 Cleaned model output successfully.")
111
+ log(f"Model reply: {reply}")
112
 
113
+ # 4️⃣ Log model folder files
114
  try:
115
+ model_dir = model.name_or_path
116
+ log(f"📁 Model files read from: {model_dir}")
117
  if os.path.exists(model_dir):
118
  for root, dirs, files in os.walk(model_dir):
119
+ for file in files[:5]:
120
  log(f" - {os.path.join(root, file)}")
121
  break
122
  except Exception as e:
123
  log(f"⚠️ Could not list model folder files: {e}")
124
 
 
125
  history.append((message, reply))
126
  return "", history, "\n".join(log_lines)
127