algorizz-chat / noapp.py
not0w4i5's picture
Rename app.py to noapp.py
225faa2 verified
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import os
from dotenv import load_dotenv
from bitsandbytes import BitsAndBytesConfig
# Load environment variables from .env file
load_dotenv()
# Streamlit page configuration
st.set_page_config(page_title="Algorizz C++ Solver", page_icon="💻")
st.title("Algorizz C++ Solver")
st.markdown("Paste a C++ question, and I'll generate the C++ code solution!")
# Get the Hugging Face token (support both local and Hugging Face Spaces)
hf_token = os.getenv("HF_TOKEN") # Default to environment variable
is_hf_spaces = os.getenv("HF_HOME") is not None # Detect Hugging Face Spaces
if is_hf_spaces:
try:
if "HF_TOKEN" in st.secrets:
hf_token = st.secrets["HF_TOKEN"]
except Exception as e:
st.warning(f"Could not access st.secrets in Hugging Face Spaces: {str(e)}. Falling back to HF_TOKEN environment variable.")
# Cache the model and tokenizer to avoid reloading
@st.cache_resource
def load_model_and_tokenizer(model_id, hf_token, is_hf_spaces):
# Define quantization config for Hugging Face Spaces (Linux)
quantization_config = None
if is_hf_spaces: # Enable quantization only on Hugging Face Spaces
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=False,
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
quantization_config=quantization_config, # Use quantization on Spaces
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
offload_folder="offload" if not is_hf_spaces else None, # Offload only locally
offload_state_dict=not is_hf_spaces, # Offload only locally
token=hf_token
)
tokenizer = AutoTokenizer.from_pretrained(
model_id,
token=hf_token
)
return model, tokenizer
# Load the model and tokenizer
model_id = "not0w4i5/algorizz"
try:
model, tokenizer = load_model_and_tokenizer(model_id, hf_token, is_hf_spaces)
except Exception as e:
st.error(f"Failed to load model: {str(e)}")
st.stop()
# Set pad_token_id to eos_token_id if not already set
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
# Initialize chat history in Streamlit session state
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Accept user input
if prompt := st.chat_input("Enter your C++ question (e.g., 'Find the sum of two numbers')"):
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Generate C++ code
with st.chat_message("assistant"):
with st.spinner("Generating C++ code..."):
# Format the prompt for the model
formatted_prompt = f"Problem: {prompt}\nGenerate a C++ solution only (no Java or other languages):\nSolution in C++:"
inputs = tokenizer(
formatted_prompt,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512,
).to("cuda" if torch.cuda.is_available() else "cpu")
# Generate the response with tuned parameters
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_length=256, # Reduced for faster generation
num_return_sequences=1,
do_sample=True,
temperature=0.5, # More deterministic for faster generation
top_p=0.7, # Focus on high-probability tokens
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
# Decode the generated code
generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
code_start = generated_code.find("Solution in C++:") + len("Solution in C++:")
code = generated_code[code_start:].strip()
# Post-process to remove any non-C++ code
java_markers = ["Solution in Java:", "Solution in JAVA:", "public class", "import java", "int[]", "ArrayList"]
for marker in java_markers:
if marker in code:
code = code[:code.find(marker)].strip()
# Display the code
st.code(code, language="cpp")
# Add assistant message to chat history
st.session_state.messages.append({"role": "assistant", "content": code})