Spaces:

prakhardoneria
/

CodeIT

Runtime error

prakhardoneria commited on May 5, 2025

Commit

282328e

verified ·

1 Parent(s): c5a5582

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,37 +1,38 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
 import gradio as gr
-# Load model (automatically downloaded and cached by Hugging Face)
-model_id = "mistralai/Mistral-7B-Instruct-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     device_map="auto"
 )
 streamer = TextStreamer(tokenizer, skip_prompt=True)
-# Simple chat loop
 def chat(message, history):
     prompt = ""
     for user, bot in history:
-        prompt += f"[INST] {user.strip()} [/INST] {bot.strip()} "
-    prompt += f"[INST] {message.strip()} [/INST]"
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    output = model.generate(
         **inputs,
         max_new_tokens=256,
         temperature=0.7,
         do_sample=True,
-        top_p=0.95,
         pad_token_id=tokenizer.eos_token_id
     )
-    result = tokenizer.decode(output[0], skip_special_tokens=True).split("[/INST]")[-1].strip()
-    return result
-# Launch Gradio app
-gr.ChatInterface(fn=chat, title="Mistral Chat (CPU)", description="Ask questions, get answers using a real LLM.").launch()

 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 import gradio as gr
+# Use lightweight, public model
+model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
+    torch_dtype=torch.float32,
     device_map="auto"
 )
 streamer = TextStreamer(tokenizer, skip_prompt=True)
+# Chat formatting
 def chat(message, history):
     prompt = ""
     for user, bot in history:
+        prompt += f"<|user|>\n{user.strip()}\n<|assistant|>\n{bot.strip()}\n"
+    prompt += f"<|user|>\n{message.strip()}\n<|assistant|>\n"
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(
         **inputs,
         max_new_tokens=256,
         temperature=0.7,
+        top_p=0.9,
         do_sample=True,
         pad_token_id=tokenizer.eos_token_id
     )
+    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    reply = text.split("<|assistant|>")[-1].strip()
+    return reply
+# Gradio UI
+gr.ChatInterface(chat, title="TinyLlama Chat", description="Lightweight local LLM (1.1B)").launch()