import streamlit as st import time import os # Initialize availability flags TORCH_AVAILABLE = False TRANSFORMERS_AVAILABLE = False # Try to import required libraries with error handling try: import torch TORCH_AVAILABLE = True except ImportError: pass try: from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline TRANSFORMERS_AVAILABLE = True except ImportError: pass # Page configuration st.set_page_config( page_title="Hakim AI Assistant", page_icon="🤖", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for better UI st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def load_model_and_tokenizer(): """Load the model and tokenizer with caching for better performance""" if not TORCH_AVAILABLE: st.error("❌ PyTorch is not installed. Please check your requirements.txt file.") return None, None, None if not TRANSFORMERS_AVAILABLE: st.error("❌ Transformers library is not installed. Please check your requirements.txt file.") return None, None, None try: with st.spinner("🔄 Loading Hakim model... This may take a few minutes on first load."): # Load tokenizer first tokenizer = AutoTokenizer.from_pretrained( "Rabe3/Hakim", trust_remote_code=True ) # For CPU-only deployment, load model with specific settings model = AutoModelForCausalLM.from_pretrained( "Rabe3/Hakim", torch_dtype=torch.float32, # Use float32 for CPU device_map="cpu", # Force CPU usage trust_remote_code=True, low_cpu_mem_usage=True # Optimize for CPU memory usage ) # Create pipeline optimized for CPU text_pipeline = pipeline( "text-generation", model=model, tokenizer=tokenizer, device="cpu", # Explicitly set to CPU torch_dtype=torch.float32 ) st.success("✅ Model loaded successfully!") return tokenizer, model, text_pipeline except Exception as e: st.error(f"❌ Error loading model: {str(e)}") st.info("💡 Make sure the model 'Rabe3/Hakim' exists and is accessible.") return None, None, None def generate_response(pipeline_obj, prompt, system_prompt, max_length=256, temperature=0.7, top_p=0.9, do_sample=True): """Generate response using the model pipeline""" if pipeline_obj is None: return "❌ Model not loaded. Please refresh the page." try: # Combine system prompt with user input full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:" # Generate response with CPU-optimized settings with st.spinner("🤔 Generating response..."): response = pipeline_obj( full_prompt, max_new_tokens=max_length, # Use max_new_tokens instead of max_length temperature=temperature, top_p=top_p, do_sample=do_sample, pad_token_id=pipeline_obj.tokenizer.eos_token_id, return_full_text=False, num_return_sequences=1, clean_up_tokenization_spaces=True ) # Extract generated text generated_text = response[0]['generated_text'] # Clean up the response if "Assistant:" in generated_text: generated_text = generated_text.split("Assistant:")[-1].strip() # Remove any remaining prompt artifacts lines = generated_text.split('\n') cleaned_lines = [] for line in lines: line = line.strip() if line and not line.startswith("User:") and not line.startswith("Human:"): cleaned_lines.append(line) return '\n'.join(cleaned_lines) if cleaned_lines else "I apologize, but I couldn't generate a proper response." except Exception as e: return f"❌ Error generating response: {str(e)}" def create_fallback_demo(): """Create a simple demo mode when model loading fails""" st.warning("🔧 Model loading failed. Running in demo mode with simulated responses.") class DemoTokenizer: def __init__(self): self.eos_token_id = 2 class DemoPipeline: def __init__(self): self.tokenizer = DemoTokenizer() def __call__(self, prompt, **kwargs): # Simulate response generation time.sleep(1) # Simulate processing time # Simple demo responses based on input if any(arabic_word in prompt for arabic_word in ['مرحبا', 'السلام', 'أهلا']): response = "مرحبا بك! أنا حكيم، مساعدك الذكي. كيف يمكنني مساعدتك اليوم؟" elif 'hello' in prompt.lower() or 'hi' in prompt.lower(): response = "Hello! I'm Hakim, your AI assistant. How can I help you today?" elif 'what' in prompt.lower() and 'ai' in prompt.lower(): response = "Artificial Intelligence (AI) refers to computer systems that can perform tasks that typically require human intelligence, such as learning, reasoning, and problem-solving." else: response = "I understand your question. This is a demo response since the actual model couldn't be loaded. In a real deployment, I would provide a more detailed and contextual answer based on the Rabe3/Hakim model." return [{'generated_text': response}] st.info("✅ Demo mode initialized!") return DemoTokenizer(), None, DemoPipeline() def show_requirements_info(): """Show information about missing requirements""" st.error("🚫 Required libraries are missing!") st.markdown("""
Your Hugging Face Space needs the following libraries. Make sure your requirements.txt contains:
streamlit
torch --extra-index-url https://download.pytorch.org/whl/cpu
transformers
accelerate
sentencepiece
protobuf
Powered by Rabe3/Hakim model from Hugging Face 🤗
Running on CPU - Optimized for Hugging Face Spaces