Spaces:

not0w4i5
/

algorizz-chat

Sleeping

App Files Files Community

algorizz-chat / noapp.py

not0w4i5

Rename app.py to noapp.py

225faa2 verified about 1 year ago

raw

history blame contribute delete

4.91 kB

	import streamlit as st
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	import os
	from dotenv import load_dotenv
	from bitsandbytes import BitsAndBytesConfig

	# Load environment variables from .env file
	load_dotenv()

	# Streamlit page configuration
	st.set_page_config(page_title="Algorizz C++ Solver", page_icon="💻")
	st.title("Algorizz C++ Solver")
	st.markdown("Paste a C++ question, and I'll generate the C++ code solution!")

	# Get the Hugging Face token (support both local and Hugging Face Spaces)
	hf_token = os.getenv("HF_TOKEN") # Default to environment variable
	is_hf_spaces = os.getenv("HF_HOME") is not None # Detect Hugging Face Spaces

	if is_hf_spaces:
	try:
	if "HF_TOKEN" in st.secrets:
	hf_token = st.secrets["HF_TOKEN"]
	except Exception as e:
	st.warning(f"Could not access st.secrets in Hugging Face Spaces: {str(e)}. Falling back to HF_TOKEN environment variable.")

	# Cache the model and tokenizer to avoid reloading
	@st.cache_resource
	def load_model_and_tokenizer(model_id, hf_token, is_hf_spaces):
	# Define quantization config for Hugging Face Spaces (Linux)
	quantization_config = None
	if is_hf_spaces: # Enable quantization only on Hugging Face Spaces
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_use_double_quant=False,
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	device_map="auto",
	quantization_config=quantization_config, # Use quantization on Spaces
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True,
	offload_folder="offload" if not is_hf_spaces else None, # Offload only locally
	offload_state_dict=not is_hf_spaces, # Offload only locally
	token=hf_token
	)
	tokenizer = AutoTokenizer.from_pretrained(
	model_id,
	token=hf_token
	)
	return model, tokenizer

	# Load the model and tokenizer
	model_id = "not0w4i5/algorizz"
	try:
	model, tokenizer = load_model_and_tokenizer(model_id, hf_token, is_hf_spaces)
	except Exception as e:
	st.error(f"Failed to load model: {str(e)}")
	st.stop()

	# Set pad_token_id to eos_token_id if not already set
	if tokenizer.pad_token_id is None:
	tokenizer.pad_token_id = tokenizer.eos_token_id

	# Initialize chat history in Streamlit session state
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat history
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Accept user input
	if prompt := st.chat_input("Enter your C++ question (e.g., 'Find the sum of two numbers')"):
	# Add user message to chat history
	st.session_state.messages.append({"role": "user", "content": prompt})
	with st.chat_message("user"):
	st.markdown(prompt)

	# Generate C++ code
	with st.chat_message("assistant"):
	with st.spinner("Generating C++ code..."):
	# Format the prompt for the model
	formatted_prompt = f"Problem: {prompt}\nGenerate a C++ solution only (no Java or other languages):\nSolution in C++:"
	inputs = tokenizer(
	formatted_prompt,
	return_tensors="pt",
	padding=True,
	truncation=True,
	max_length=512,
	).to("cuda" if torch.cuda.is_available() else "cpu")

	# Generate the response with tuned parameters
	outputs = model.generate(
	input_ids=inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	max_length=256, # Reduced for faster generation
	num_return_sequences=1,
	do_sample=True,
	temperature=0.5, # More deterministic for faster generation
	top_p=0.7, # Focus on high-probability tokens
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	)

	# Decode the generated code
	generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
	code_start = generated_code.find("Solution in C++:") + len("Solution in C++:")
	code = generated_code[code_start:].strip()

	# Post-process to remove any non-C++ code
	java_markers = ["Solution in Java:", "Solution in JAVA:", "public class", "import java", "int[]", "ArrayList"]
	for marker in java_markers:
	if marker in code:
	code = code[:code.find(marker)].strip()

	# Display the code
	st.code(code, language="cpp")

	# Add assistant message to chat history
	st.session_state.messages.append({"role": "assistant", "content": code})