import streamlit as st import time import os # Initialize availability flags TORCH_AVAILABLE = False TRANSFORMERS_AVAILABLE = False # Try to import required libraries with error handling try: import torch TORCH_AVAILABLE = True except ImportError: pass try: from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline TRANSFORMERS_AVAILABLE = True except ImportError: pass # Page configuration st.set_page_config( page_title="Hakim AI Assistant", page_icon="🤖", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for better UI st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def load_model_and_tokenizer(): """Load the model and tokenizer with caching for better performance""" if not TORCH_AVAILABLE: st.error("❌ PyTorch is not installed. Please check your requirements.txt file.") return None, None, None if not TRANSFORMERS_AVAILABLE: st.error("❌ Transformers library is not installed. Please check your requirements.txt file.") return None, None, None try: with st.spinner("🔄 Loading Hakim model... This may take a few minutes on first load."): # Load tokenizer first tokenizer = AutoTokenizer.from_pretrained( "Rabe3/Hakim", trust_remote_code=True ) # For CPU-only deployment, load model with specific settings model = AutoModelForCausalLM.from_pretrained( "Rabe3/Hakim", torch_dtype=torch.float32, # Use float32 for CPU device_map="cpu", # Force CPU usage trust_remote_code=True, low_cpu_mem_usage=True # Optimize for CPU memory usage ) # Create pipeline optimized for CPU text_pipeline = pipeline( "text-generation", model=model, tokenizer=tokenizer, device="cpu", # Explicitly set to CPU torch_dtype=torch.float32 ) st.success("✅ Model loaded successfully!") return tokenizer, model, text_pipeline except Exception as e: st.error(f"❌ Error loading model: {str(e)}") st.info("💡 Make sure the model 'Rabe3/Hakim' exists and is accessible.") return None, None, None def generate_response(pipeline_obj, prompt, system_prompt, max_length=256, temperature=0.7, top_p=0.9, do_sample=True): """Generate response using the model pipeline""" if pipeline_obj is None: return "❌ Model not loaded. Please refresh the page." try: # Combine system prompt with user input full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:" # Generate response with CPU-optimized settings with st.spinner("🤔 Generating response..."): response = pipeline_obj( full_prompt, max_new_tokens=max_length, # Use max_new_tokens instead of max_length temperature=temperature, top_p=top_p, do_sample=do_sample, pad_token_id=pipeline_obj.tokenizer.eos_token_id, return_full_text=False, num_return_sequences=1, clean_up_tokenization_spaces=True ) # Extract generated text generated_text = response[0]['generated_text'] # Clean up the response if "Assistant:" in generated_text: generated_text = generated_text.split("Assistant:")[-1].strip() # Remove any remaining prompt artifacts lines = generated_text.split('\n') cleaned_lines = [] for line in lines: line = line.strip() if line and not line.startswith("User:") and not line.startswith("Human:"): cleaned_lines.append(line) return '\n'.join(cleaned_lines) if cleaned_lines else "I apologize, but I couldn't generate a proper response." except Exception as e: return f"❌ Error generating response: {str(e)}" def create_fallback_demo(): """Create a simple demo mode when model loading fails""" st.warning("🔧 Model loading failed. Running in demo mode with simulated responses.") class DemoTokenizer: def __init__(self): self.eos_token_id = 2 class DemoPipeline: def __init__(self): self.tokenizer = DemoTokenizer() def __call__(self, prompt, **kwargs): # Simulate response generation time.sleep(1) # Simulate processing time # Simple demo responses based on input if any(arabic_word in prompt for arabic_word in ['مرحبا', 'السلام', 'أهلا']): response = "مرحبا بك! أنا حكيم، مساعدك الذكي. كيف يمكنني مساعدتك اليوم؟" elif 'hello' in prompt.lower() or 'hi' in prompt.lower(): response = "Hello! I'm Hakim, your AI assistant. How can I help you today?" elif 'what' in prompt.lower() and 'ai' in prompt.lower(): response = "Artificial Intelligence (AI) refers to computer systems that can perform tasks that typically require human intelligence, such as learning, reasoning, and problem-solving." else: response = "I understand your question. This is a demo response since the actual model couldn't be loaded. In a real deployment, I would provide a more detailed and contextual answer based on the Rabe3/Hakim model." return [{'generated_text': response}] st.info("✅ Demo mode initialized!") return DemoTokenizer(), None, DemoPipeline() def show_requirements_info(): """Show information about missing requirements""" st.error("🚫 Required libraries are missing!") st.markdown("""

Missing Dependencies

Your Hugging Face Space needs the following libraries. Make sure your requirements.txt contains:

streamlit
torch --extra-index-url https://download.pytorch.org/whl/cpu
transformers
accelerate
sentencepiece
protobuf

""", unsafe_allow_html=True) def main(): # Header st.markdown('

🤖 Hakim AI Assistant

', unsafe_allow_html=True) # Check if required libraries are available if not TORCH_AVAILABLE or not TRANSFORMERS_AVAILABLE: show_requirements_info() return # Load model tokenizer, model, pipeline_obj = load_model_and_tokenizer() if pipeline_obj is None: st.error("❌ Failed to load the model. Please check the logs and try again.") return # Sidebar for configuration with st.sidebar: st.header("⚙️ Configuration") # System prompt system_prompt = st.text_area( "System Prompt", value="You are Hakim, a helpful AI assistant. You provide accurate, helpful, and informative responses. You communicate clearly and professionally in a concise manner.", height=150, help="This prompt sets the behavior and personality of the AI assistant." ) st.divider() # Generation parameters (optimized for CPU) st.subheader("Generation Parameters") max_length = st.slider( "Max New Tokens", min_value=32, max_value=512, value=128, step=16, help="Maximum number of new tokens to generate (lower values are faster on CPU)" ) temperature = st.slider( "Temperature", min_value=0.1, max_value=1.5, value=0.7, step=0.1, help="Controls randomness (lower = more focused, higher = more creative)" ) top_p = st.slider( "Top P", min_value=0.5, max_value=1.0, value=0.9, step=0.05, help="Controls diversity via nucleus sampling" ) do_sample = st.checkbox( "Enable Sampling", value=True, help="Enable sampling for more diverse responses" ) st.divider() # Model info st.subheader("ℹ️ Model Information") st.info("**Model:** Rabe3/Hakim\n**Type:** Causal Language Model\n**Device:** CPU\n**Framework:** Transformers") # Performance tip st.warning("💡 **CPU Performance Tip:** Lower token limits will generate responses faster.") # Clear chat button if st.button("🗑️ Clear Chat History", type="secondary", use_container_width=True): if 'messages' in st.session_state: st.session_state.messages = [] st.rerun() # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Main chat interface st.header("💬 Chat Interface") # Display chat history if st.session_state.messages: for message in st.session_state.messages: if message["role"] == "user": st.markdown(f'

You: {message["content"]}

', unsafe_allow_html=True) else: st.markdown(f'

Hakim: {message["content"]}

', unsafe_allow_html=True) else: st.info("👋 Welcome! Start a conversation by typing your message below.") # Chat input user_input = st.text_area( "Enter your message:", height=100, placeholder="Type your message here...", key="user_input" ) col1, col2 = st.columns([1, 2]) with col1: send_button = st.button("📤 Send", type="primary", disabled=not user_input.strip()) with col2: if st.button("💡 Example Questions"): examples = [ "مرحبا، كيف يمكنك مساعدتي؟", # Arabic: Hello, how can you help me? "What is artificial intelligence?", "Can you explain machine learning?", "Tell me about renewable energy" ] selected_example = st.selectbox("Choose an example:", [""] + examples, key="example_select") if selected_example: st.session_state.user_input = selected_example # Process user input if send_button and user_input.strip(): # Add user message to history st.session_state.messages.append({"role": "user", "content": user_input}) # Generate response response = generate_response( pipeline_obj=pipeline_obj, prompt=user_input, system_prompt=system_prompt, max_length=max_length, temperature=temperature, top_p=top_p, do_sample=do_sample ) # Add assistant response to history st.session_state.messages.append({"role": "assistant", "content": response}) # Clear input and rerun st.session_state.user_input = "" st.rerun() # Footer st.divider() st.markdown( """

Running on CPU - Optimized for Hugging Face Spaces

""", unsafe_allow_html=True ) if __name__ == "__main__": main()