import streamlit as st import torch from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig # Title for UI st.title("💬 DeepSeek Math Assistant") # Model and Tokenizer Setup model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" device = "cuda" if torch.cuda.is_available() else "cpu" @st.cache_resource def load_model_and_tokenizer(): tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", trust_remote_code=True ) return tokenizer, model tokenizer, model = load_model_and_tokenizer() # User Input user_input = st.text_area("Ask a math question:", height=100, placeholder="e.g. What's the integral of x^2?") generate_button = st.button("Generate Answer") # Generation Settings gen_config = GenerationConfig( max_length=256, temperature=0.7, top_p=0.9, repetition_penalty=1.2, num_beams=5, do_sample=True ) if generate_button and user_input.strip() != "": # Add a system prompt to specialize the model prompt = f"You are a specialized math assistant. Only answer math-related questions.\nUser: {user_input}\nAssistant:" # Tokenize inputs = tokenizer(prompt, return_tensors="pt").to(model.device) input_ids = inputs["input_ids"] attention_mask = inputs.get("attention_mask", None) output = model.generate( input_ids=input_ids, attention_mask=attention_mask, generation_config=gen_config ) # Decode response = tokenizer.decode(output[0], skip_special_tokens=True) # Extract only the assistant's answer if "Assistant:" in response: response = response.split("Assistant:")[-1].strip() # Show response st.markdown(f"**🧠 Answer:** {response}")