StudentHelper / app.py
arasaltan's picture
Update app.py
77f4af2 verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
LORA_PATH = "./"
# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
# Base model (CPU)
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float32,
device_map={"": "cpu"},
low_cpu_mem_usage=True
)
# Load LoRA
model = PeftModel.from_pretrained(model, LORA_PATH)
model.eval()
def chat(user_prompt, max_tokens, temperature):
prompt = f"""
You are a lab assistant.
Answer in **Markdown** format.
Use headings, bullet points, and code blocks when appropriate.
Question:
{user_prompt}
Answer:
"""
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=int(max_tokens),
do_sample=False, # CPU için hızlı
eos_token_id=tokenizer.eos_token_id
)
generated = output[0][inputs["input_ids"].shape[-1]:]
return tokenizer.decode(generated, skip_special_tokens=True)
# Gradio UI
demo = gr.Interface(
fn=chat,
inputs=[
gr.Textbox(lines=5, label="Prompt"),
gr.Slider(32, 512, value=256, step=32, label="Max tokens"),
gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
],
outputs=gr.Markdown(label="Answer"),
title="DeepSeek Lab Assistant (LoRA)",
)
if __name__ == "__main__":
demo.launch()