rahuldhole commited on
Commit
13b2e32
·
verified ·
1 Parent(s): 0ddbaa5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -17
app.py CHANGED
@@ -1,34 +1,34 @@
1
- import torch, gradio as gr
 
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from peft import PeftModel
4
- import os
5
-
6
- model_id = "Qwen/Qwen2.5-0.5B-Instruct"
7
 
8
- # Adapter source: local folder first, then Hub repo
9
- local_adapter = "outputs/qwen-fine-tuned"
10
- hub_adapter = os.getenv("HF_MODEL_NAME", "rahuldhole/tiny-llm-qwen-adapter")
11
- # Prefix with username if it's just a name
12
- if "/" not in hub_adapter:
13
- hub_adapter = f"{os.getenv('HF_USERNAME', 'rahuldhole')}/{hub_adapter}"
14
 
15
- adapter_path = local_adapter if os.path.exists(local_adapter) else hub_adapter
 
16
 
17
  # Device
18
  device = "cuda" if torch.cuda.is_available() else "cpu"
19
  if not torch.cuda.is_available() and torch.backends.mps.is_available():
20
  device = "mps"
21
 
22
- print(f"Device: {device} | Adapter: {adapter_path}")
 
23
 
24
- tokenizer = AutoTokenizer.from_pretrained(model_id)
25
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
26
 
27
  try:
28
  model = PeftModel.from_pretrained(model, adapter_path)
29
- print("✅ Adapter loaded!")
30
  except Exception as e:
31
- print(f"⚠️ Adapter not loaded ({e}), using base model.")
 
32
 
33
  def chat(message, history):
34
  msgs = [{"role": "user", "content": message}]
@@ -37,4 +37,9 @@ def chat(message, history):
37
  ids = model.generate(**inputs, max_new_tokens=512, pad_token_id=tokenizer.eos_token_id)
38
  return tokenizer.decode(ids[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
39
 
40
- gr.ChatInterface(chat, title="Tiny LLM Chat", description="Chat with a fine-tuned Qwen 0.5B model").launch()
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from peft import PeftModel
 
 
 
6
 
7
+ # ── Config ──
8
+ MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
9
+ LOCAL_ADAPTER = "outputs/qwen-fine-tuned"
10
+ HUB_ADAPTER = "rahuldhole/tiny-llm-qwen-adapter"
 
 
11
 
12
+ # Adapter source: local > Hub
13
+ adapter_path = LOCAL_ADAPTER if os.path.exists(LOCAL_ADAPTER) else HUB_ADAPTER
14
 
15
  # Device
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
  if not torch.cuda.is_available() and torch.backends.mps.is_available():
18
  device = "mps"
19
 
20
+ print("🧠 Tiny LLM by Rahul Dhole")
21
+ print(f" Base: {MODEL_ID} | Device: {device} | Adapter: {adapter_path}")
22
 
23
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
24
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.float16, device_map="auto")
25
 
26
  try:
27
  model = PeftModel.from_pretrained(model, adapter_path)
28
+ print(" ✅ Adapter loaded!")
29
  except Exception as e:
30
+ print(f" ⚠️ Adapter not loaded ({e}), using base model.")
31
+
32
 
33
  def chat(message, history):
34
  msgs = [{"role": "user", "content": message}]
 
37
  ids = model.generate(**inputs, max_new_tokens=512, pad_token_id=tokenizer.eos_token_id)
38
  return tokenizer.decode(ids[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
39
 
40
+
41
+ gr.ChatInterface(
42
+ chat,
43
+ title="🧠 Tiny LLM",
44
+ description="Fine-tuned by **Rahul Dhole** • Base model: Qwen2.5-0.5B-Instruct",
45
+ ).launch()