Second_Try_Cag / app.py
kouki321's picture
Update app.py
c55d186 verified
import streamlit as st
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
# Title for UI
st.title("💬 DeepSeek Math Assistant")
# Model and Tokenizer Setup
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
device = "cuda" if torch.cuda.is_available() else "cpu"
@st.cache_resource
def load_model_and_tokenizer():
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto",
trust_remote_code=True
)
return tokenizer, model
tokenizer, model = load_model_and_tokenizer()
# User Input
user_input = st.text_area("Ask a math question:", height=100, placeholder="e.g. What's the integral of x^2?")
generate_button = st.button("Generate Answer")
# Generation Settings
gen_config = GenerationConfig(
max_length=256,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.2,
num_beams=5,
do_sample=True
)
if generate_button and user_input.strip() != "":
# Add a system prompt to specialize the model
prompt = f"You are a specialized math assistant. Only answer math-related questions.\nUser: {user_input}\nAssistant:"
# Tokenize
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
input_ids = inputs["input_ids"]
attention_mask = inputs.get("attention_mask", None)
output = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
generation_config=gen_config
)
# Decode
response = tokenizer.decode(output[0], skip_special_tokens=True)
# Extract only the assistant's answer
if "Assistant:" in response:
response = response.split("Assistant:")[-1].strip()
# Show response
st.markdown(f"**🧠 Answer:** {response}")