Spaces:

Rabe3
/

hakim-v1

Sleeping

File size: 12,636 Bytes

import streamlit as st
import time
import os

# Initialize availability flags
TORCH_AVAILABLE = False
TRANSFORMERS_AVAILABLE = False

# Try to import required libraries with error handling
try:
    import torch
    TORCH_AVAILABLE = True
except ImportError:
    pass

try:
    from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
    TRANSFORMERS_AVAILABLE = True
except ImportError:
    pass

# Page configuration
st.set_page_config(
    page_title="Hakim AI Assistant",
    page_icon="🤖",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better UI
st.markdown("""
<style>
    .main-header {
        text-align: center;
        color: #2E86AB;
        font-size: 2.5rem;
        margin-bottom: 2rem;
    }
    .chat-message {
        padding: 1rem;
        border-radius: 10px;
        margin: 1rem 0;
    }
    .user-message {
        background-color: #E3F2FD;
        border-left: 5px solid #2196F3;
    }
    .assistant-message {
        background-color: #F1F8E9;
        border-left: 5px solid #4CAF50;
    }
    .stTextArea textarea {
        border-radius: 10px;
    }
    .error-box {
        background-color: #ffebee;
        border: 1px solid #f44336;
        border-radius: 5px;
        padding: 1rem;
        margin: 1rem 0;
    }
</style>
""", unsafe_allow_html=True)

@st.cache_resource
def load_model_and_tokenizer():
    """Load the model and tokenizer with caching for better performance"""
    if not TORCH_AVAILABLE:
        st.error("❌ PyTorch is not installed. Please check your requirements.txt file.")
        return None, None, None
    
    if not TRANSFORMERS_AVAILABLE:
        st.error("❌ Transformers library is not installed. Please check your requirements.txt file.")
        return None, None, None
    
    try:
        with st.spinner("🔄 Loading Hakim model... This may take a few minutes on first load."):
            # Load tokenizer first
            tokenizer = AutoTokenizer.from_pretrained(
                "Rabe3/Hakim",
                trust_remote_code=True
            )
            
            # For CPU-only deployment, load model with specific settings
            model = AutoModelForCausalLM.from_pretrained(
                "Rabe3/Hakim",
                torch_dtype=torch.float32,  # Use float32 for CPU
                device_map="cpu",  # Force CPU usage
                trust_remote_code=True,
                low_cpu_mem_usage=True  # Optimize for CPU memory usage
            )
            
            # Create pipeline optimized for CPU
            text_pipeline = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                device="cpu",  # Explicitly set to CPU
                torch_dtype=torch.float32
            )
            
            st.success("✅ Model loaded successfully!")
            return tokenizer, model, text_pipeline
            
    except Exception as e:
        st.error(f"❌ Error loading model: {str(e)}")
        st.info("💡 Make sure the model 'Rabe3/Hakim' exists and is accessible.")
        return None, None, None

def generate_response(pipeline_obj, prompt, system_prompt, max_length=256, temperature=0.7, top_p=0.9, do_sample=True):
    """Generate response using the model pipeline"""
    if pipeline_obj is None:
        return "❌ Model not loaded. Please refresh the page."
    
    try:
        # Combine system prompt with user input
        full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:"
        
        # Generate response with CPU-optimized settings
        with st.spinner("🤔 Generating response..."):
            response = pipeline_obj(
                full_prompt,
                max_new_tokens=max_length,  # Use max_new_tokens instead of max_length
                temperature=temperature,
                top_p=top_p,
                do_sample=do_sample,
                pad_token_id=pipeline_obj.tokenizer.eos_token_id,
                return_full_text=False,
                num_return_sequences=1,
                clean_up_tokenization_spaces=True
            )
        
        # Extract generated text
        generated_text = response[0]['generated_text']
        
        # Clean up the response
        if "Assistant:" in generated_text:
            generated_text = generated_text.split("Assistant:")[-1].strip()
        
        # Remove any remaining prompt artifacts
        lines = generated_text.split('\n')
        cleaned_lines = []
        for line in lines:
            line = line.strip()
            if line and not line.startswith("User:") and not line.startswith("Human:"):
                cleaned_lines.append(line)
        
        return '\n'.join(cleaned_lines) if cleaned_lines else "I apologize, but I couldn't generate a proper response."
    
    except Exception as e:
        return f"❌ Error generating response: {str(e)}"

def create_fallback_demo():
    """Create a simple demo mode when model loading fails"""
    st.warning("🔧 Model loading failed. Running in demo mode with simulated responses.")
    
    class DemoTokenizer:
        def __init__(self):
            self.eos_token_id = 2
    
    class DemoPipeline:
        def __init__(self):
            self.tokenizer = DemoTokenizer()
        
        def __call__(self, prompt, **kwargs):
            # Simulate response generation
            time.sleep(1)  # Simulate processing time
            
            # Simple demo responses based on input
            if any(arabic_word in prompt for arabic_word in ['مرحبا', 'السلام', 'أهلا']):
                response = "مرحبا بك! أنا حكيم، مساعدك الذكي. كيف يمكنني مساعدتك اليوم؟"
            elif 'hello' in prompt.lower() or 'hi' in prompt.lower():
                response = "Hello! I'm Hakim, your AI assistant. How can I help you today?"
            elif 'what' in prompt.lower() and 'ai' in prompt.lower():
                response = "Artificial Intelligence (AI) refers to computer systems that can perform tasks that typically require human intelligence, such as learning, reasoning, and problem-solving."
            else:
                response = "I understand your question. This is a demo response since the actual model couldn't be loaded. In a real deployment, I would provide a more detailed and contextual answer based on the Rabe3/Hakim model."
            
            return [{'generated_text': response}]
    
    st.info("✅ Demo mode initialized!")
    return DemoTokenizer(), None, DemoPipeline()

def show_requirements_info():
    """Show information about missing requirements"""
    st.error("🚫 Required libraries are missing!")
    
    st.markdown("""
    <div class="error-box">
    <h3>Missing Dependencies</h3>
    <p>Your Hugging Face Space needs the following libraries. Make sure your <code>requirements.txt</code> contains:</p>
    <pre>
streamlit
torch --extra-index-url https://download.pytorch.org/whl/cpu
transformers
accelerate
sentencepiece
protobuf
    </pre>
    </div>
    """, unsafe_allow_html=True)

def main():
    # Header
    st.markdown('<h1 class="main-header">🤖 Hakim AI Assistant</h1>', unsafe_allow_html=True)
    
    # Check if required libraries are available
    if not TORCH_AVAILABLE or not TRANSFORMERS_AVAILABLE:
        show_requirements_info()
        return
    
    # Load model
    tokenizer, model, pipeline_obj = load_model_and_tokenizer()
    
    if pipeline_obj is None:
        st.error("❌ Failed to load the model. Please check the logs and try again.")
        return
    
    # Sidebar for configuration
    with st.sidebar:
        st.header("⚙️ Configuration")
        
        # System prompt
        system_prompt = st.text_area(
            "System Prompt",
            value="You are Hakim, a helpful AI assistant. You provide accurate, helpful, and informative responses. You communicate clearly and professionally in a concise manner.",
            height=150,
            help="This prompt sets the behavior and personality of the AI assistant."
        )
        
        st.divider()
        
        # Generation parameters (optimized for CPU)
        st.subheader("Generation Parameters")
        
        max_length = st.slider(
            "Max New Tokens",
            min_value=32,
            max_value=512,
            value=128,
            step=16,
            help="Maximum number of new tokens to generate (lower values are faster on CPU)"
        )
        
        temperature = st.slider(
            "Temperature",
            min_value=0.1,
            max_value=1.5,
            value=0.7,
            step=0.1,
            help="Controls randomness (lower = more focused, higher = more creative)"
        )
        
        top_p = st.slider(
            "Top P",
            min_value=0.5,
            max_value=1.0,
            value=0.9,
            step=0.05,
            help="Controls diversity via nucleus sampling"
        )
        
        do_sample = st.checkbox(
            "Enable Sampling",
            value=True,
            help="Enable sampling for more diverse responses"
        )
        
        st.divider()
        
        # Model info
        st.subheader("ℹ️ Model Information")
        st.info("**Model:** Rabe3/Hakim\n**Type:** Causal Language Model\n**Device:** CPU\n**Framework:** Transformers")
        
        # Performance tip
        st.warning("💡 **CPU Performance Tip:** Lower token limits will generate responses faster.")
        
        # Clear chat button
        if st.button("🗑️ Clear Chat History", type="secondary", use_container_width=True):
            if 'messages' in st.session_state:
                st.session_state.messages = []
            st.rerun()
    
    # Initialize chat history
    if "messages" not in st.session_state:
        st.session_state.messages = []
    
    # Main chat interface
    st.header("💬 Chat Interface")
    
    # Display chat history
    if st.session_state.messages:
        for message in st.session_state.messages:
            if message["role"] == "user":
                st.markdown(f'<div class="chat-message user-message"><strong>You:</strong> {message["content"]}</div>', unsafe_allow_html=True)
            else:
                st.markdown(f'<div class="chat-message assistant-message"><strong>Hakim:</strong> {message["content"]}</div>', unsafe_allow_html=True)
    else:
        st.info("👋 Welcome! Start a conversation by typing your message below.")
    
    # Chat input
    user_input = st.text_area(
        "Enter your message:",
        height=100,
        placeholder="Type your message here...",
        key="user_input"
    )
    
    col1, col2 = st.columns([1, 2])
    
    with col1:
        send_button = st.button("📤 Send", type="primary", disabled=not user_input.strip())
    
    with col2:
        if st.button("💡 Example Questions"):
            examples = [
                "مرحبا، كيف يمكنك مساعدتي؟",  # Arabic: Hello, how can you help me?
                "What is artificial intelligence?",
                "Can you explain machine learning?",
                "Tell me about renewable energy"
            ]
            selected_example = st.selectbox("Choose an example:", [""] + examples, key="example_select")
            if selected_example:
                st.session_state.user_input = selected_example
    
    # Process user input
    if send_button and user_input.strip():
        # Add user message to history
        st.session_state.messages.append({"role": "user", "content": user_input})
        
        # Generate response
        response = generate_response(
            pipeline_obj=pipeline_obj,
            prompt=user_input,
            system_prompt=system_prompt,
            max_length=max_length,
            temperature=temperature,
            top_p=top_p,
            do_sample=do_sample
        )
        
        # Add assistant response to history
        st.session_state.messages.append({"role": "assistant", "content": response})
        
        # Clear input and rerun
        st.session_state.user_input = ""
        st.rerun()
    
    # Footer
    st.divider()
    st.markdown(
        """
        <div style='text-align: center; color: #666; margin-top: 2rem;'>
            <p>Powered by <strong>Rabe3/Hakim</strong> model from Hugging Face 🤗</p>
            <p><em>Running on CPU - Optimized for Hugging Face Spaces</em></p>
        </div>
        """,
        unsafe_allow_html=True
    )

if __name__ == "__main__":
    main()