Spaces:

rahuldhole
/

tiny-llm-chat

Sleeping

App Files Files Community

rahuldhole commited on 17 days ago

Commit

13b2e32

verified ·

1 Parent(s): 0ddbaa5

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -17

app.py CHANGED Viewed

@@ -1,34 +1,34 @@
-import torch, gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
-import os
-model_id = "Qwen/Qwen2.5-0.5B-Instruct"
-# Adapter source: local folder first, then Hub repo
-local_adapter = "outputs/qwen-fine-tuned"
-hub_adapter = os.getenv("HF_MODEL_NAME", "rahuldhole/tiny-llm-qwen-adapter")
-# Prefix with username if it's just a name
-if "/" not in hub_adapter:
-    hub_adapter = f"{os.getenv('HF_USERNAME', 'rahuldhole')}/{hub_adapter}"
-adapter_path = local_adapter if os.path.exists(local_adapter) else hub_adapter
 # Device
 device = "cuda" if torch.cuda.is_available() else "cpu"
 if not torch.cuda.is_available() and torch.backends.mps.is_available():
     device = "mps"
-print(f"Device: {device} | Adapter: {adapter_path}")
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
 try:
     model = PeftModel.from_pretrained(model, adapter_path)
-    print("✅ Adapter loaded!")
 except Exception as e:
-    print(f"⚠️  Adapter not loaded ({e}), using base model.")
 def chat(message, history):
     msgs = [{"role": "user", "content": message}]
@@ -37,4 +37,9 @@ def chat(message, history):
     ids = model.generate(**inputs, max_new_tokens=512, pad_token_id=tokenizer.eos_token_id)
     return tokenizer.decode(ids[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
-gr.ChatInterface(chat, title="Tiny LLM Chat", description="Chat with a fine-tuned Qwen 0.5B model").launch()

+import os
+import torch
+import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
+# ── Config ──
+MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
+LOCAL_ADAPTER = "outputs/qwen-fine-tuned"
+HUB_ADAPTER = "rahuldhole/tiny-llm-qwen-adapter"
+# Adapter source: local > Hub
+adapter_path = LOCAL_ADAPTER if os.path.exists(LOCAL_ADAPTER) else HUB_ADAPTER
 # Device
 device = "cuda" if torch.cuda.is_available() else "cpu"
 if not torch.cuda.is_available() and torch.backends.mps.is_available():
     device = "mps"
+print("🧠 Tiny LLM by Rahul Dhole")
+print(f"   Base: {MODEL_ID} | Device: {device} | Adapter: {adapter_path}")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.float16, device_map="auto")
 try:
     model = PeftModel.from_pretrained(model, adapter_path)
+    print("   ✅ Adapter loaded!")
 except Exception as e:
+    print(f"   ⚠️  Adapter not loaded ({e}), using base model.")
 def chat(message, history):
     msgs = [{"role": "user", "content": message}]
     ids = model.generate(**inputs, max_new_tokens=512, pad_token_id=tokenizer.eos_token_id)
     return tokenizer.decode(ids[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
+gr.ChatInterface(
+    chat,
+    title="🧠 Tiny LLM",
+    description="Fine-tuned by **Rahul Dhole** • Base model: Qwen2.5-0.5B-Instruct",
+).launch()