Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| import os | |
| from dotenv import load_dotenv | |
| from bitsandbytes import BitsAndBytesConfig | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # Streamlit page configuration | |
| st.set_page_config(page_title="Algorizz C++ Solver", page_icon="💻") | |
| st.title("Algorizz C++ Solver") | |
| st.markdown("Paste a C++ question, and I'll generate the C++ code solution!") | |
| # Get the Hugging Face token (support both local and Hugging Face Spaces) | |
| hf_token = os.getenv("HF_TOKEN") # Default to environment variable | |
| is_hf_spaces = os.getenv("HF_HOME") is not None # Detect Hugging Face Spaces | |
| if is_hf_spaces: | |
| try: | |
| if "HF_TOKEN" in st.secrets: | |
| hf_token = st.secrets["HF_TOKEN"] | |
| except Exception as e: | |
| st.warning(f"Could not access st.secrets in Hugging Face Spaces: {str(e)}. Falling back to HF_TOKEN environment variable.") | |
| # Cache the model and tokenizer to avoid reloading | |
| def load_model_and_tokenizer(model_id, hf_token, is_hf_spaces): | |
| # Define quantization config for Hugging Face Spaces (Linux) | |
| quantization_config = None | |
| if is_hf_spaces: # Enable quantization only on Hugging Face Spaces | |
| quantization_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_use_double_quant=False, | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| device_map="auto", | |
| quantization_config=quantization_config, # Use quantization on Spaces | |
| torch_dtype=torch.float16, | |
| low_cpu_mem_usage=True, | |
| offload_folder="offload" if not is_hf_spaces else None, # Offload only locally | |
| offload_state_dict=not is_hf_spaces, # Offload only locally | |
| token=hf_token | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_id, | |
| token=hf_token | |
| ) | |
| return model, tokenizer | |
| # Load the model and tokenizer | |
| model_id = "not0w4i5/algorizz" | |
| try: | |
| model, tokenizer = load_model_and_tokenizer(model_id, hf_token, is_hf_spaces) | |
| except Exception as e: | |
| st.error(f"Failed to load model: {str(e)}") | |
| st.stop() | |
| # Set pad_token_id to eos_token_id if not already set | |
| if tokenizer.pad_token_id is None: | |
| tokenizer.pad_token_id = tokenizer.eos_token_id | |
| # Initialize chat history in Streamlit session state | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| # Display chat history | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| # Accept user input | |
| if prompt := st.chat_input("Enter your C++ question (e.g., 'Find the sum of two numbers')"): | |
| # Add user message to chat history | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| # Generate C++ code | |
| with st.chat_message("assistant"): | |
| with st.spinner("Generating C++ code..."): | |
| # Format the prompt for the model | |
| formatted_prompt = f"Problem: {prompt}\nGenerate a C++ solution only (no Java or other languages):\nSolution in C++:" | |
| inputs = tokenizer( | |
| formatted_prompt, | |
| return_tensors="pt", | |
| padding=True, | |
| truncation=True, | |
| max_length=512, | |
| ).to("cuda" if torch.cuda.is_available() else "cpu") | |
| # Generate the response with tuned parameters | |
| outputs = model.generate( | |
| input_ids=inputs["input_ids"], | |
| attention_mask=inputs["attention_mask"], | |
| max_length=256, # Reduced for faster generation | |
| num_return_sequences=1, | |
| do_sample=True, | |
| temperature=0.5, # More deterministic for faster generation | |
| top_p=0.7, # Focus on high-probability tokens | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| ) | |
| # Decode the generated code | |
| generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| code_start = generated_code.find("Solution in C++:") + len("Solution in C++:") | |
| code = generated_code[code_start:].strip() | |
| # Post-process to remove any non-C++ code | |
| java_markers = ["Solution in Java:", "Solution in JAVA:", "public class", "import java", "int[]", "ArrayList"] | |
| for marker in java_markers: | |
| if marker in code: | |
| code = code[:code.find(marker)].strip() | |
| # Display the code | |
| st.code(code, language="cpp") | |
| # Add assistant message to chat history | |
| st.session_state.messages.append({"role": "assistant", "content": code}) |