Spaces:

kouki321
/

Second_Try_Cag

Sleeping

App Files Files Community

Second_Try_Cag / app.py

kouki321

Update app.py

c55d186 verified 11 months ago

raw

history blame contribute delete

1.92 kB

	import streamlit as st
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

	# Title for UI
	st.title("💬 DeepSeek Math Assistant")

	# Model and Tokenizer Setup
	model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
	device = "cuda" if torch.cuda.is_available() else "cpu"

	@st.cache_resource
	def load_model_and_tokenizer():
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto",
	trust_remote_code=True
	)
	return tokenizer, model

	tokenizer, model = load_model_and_tokenizer()

	# User Input
	user_input = st.text_area("Ask a math question:", height=100, placeholder="e.g. What's the integral of x^2?")
	generate_button = st.button("Generate Answer")

	# Generation Settings
	gen_config = GenerationConfig(
	max_length=256,
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.2,
	num_beams=5,
	do_sample=True
	)

	if generate_button and user_input.strip() != "":
	# Add a system prompt to specialize the model
	prompt = f"You are a specialized math assistant. Only answer math-related questions.\nUser: {user_input}\nAssistant:"

	# Tokenize
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	input_ids = inputs["input_ids"]
	attention_mask = inputs.get("attention_mask", None)

	output = model.generate(
	input_ids=input_ids,
	attention_mask=attention_mask,
	generation_config=gen_config
	)


	# Decode
	response = tokenizer.decode(output[0], skip_special_tokens=True)

	# Extract only the assistant's answer
	if "Assistant:" in response:
	response = response.split("Assistant:")[-1].strip()

	# Show response
	st.markdown(f"🧠 Answer: {response}")