Spaces:
Running
Running
File size: 1,581 Bytes
db1be69 76f3946 157548e db1be69 0d41306 76f3946 db1be69 76f3946 25c815e 157548e 76f3946 157548e 453461b 77f4af2 453461b 157548e 77f4af2 12f607d 157548e 76f3946 db1be69 157548e 76f3946 25c815e 157548e db1be69 5103e4b db1be69 25c815e 76f3946 db1be69 76f3946 25c815e db1be69 76f3946 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
LORA_PATH = "./"
# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
# Base model (CPU)
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float32,
device_map={"": "cpu"},
low_cpu_mem_usage=True
)
# Load LoRA
model = PeftModel.from_pretrained(model, LORA_PATH)
model.eval()
def chat(user_prompt, max_tokens, temperature):
prompt = f"""
You are a lab assistant.
Answer in **Markdown** format.
Use headings, bullet points, and code blocks when appropriate.
Question:
{user_prompt}
Answer:
"""
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=int(max_tokens),
do_sample=False, # CPU için hızlı
eos_token_id=tokenizer.eos_token_id
)
generated = output[0][inputs["input_ids"].shape[-1]:]
return tokenizer.decode(generated, skip_special_tokens=True)
# Gradio UI
demo = gr.Interface(
fn=chat,
inputs=[
gr.Textbox(lines=5, label="Prompt"),
gr.Slider(32, 512, value=256, step=32, label="Max tokens"),
gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
],
outputs=gr.Markdown(label="Answer"),
title="DeepSeek Lab Assistant (LoRA)",
)
if __name__ == "__main__":
demo.launch()
|