hakim-v1 / src /streamlit_app.py
Rabe3's picture
Update src/streamlit_app.py
b52e616 verified
import streamlit as st
import time
import os
# Initialize availability flags
TORCH_AVAILABLE = False
TRANSFORMERS_AVAILABLE = False
# Try to import required libraries with error handling
try:
import torch
TORCH_AVAILABLE = True
except ImportError:
pass
try:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
TRANSFORMERS_AVAILABLE = True
except ImportError:
pass
# Page configuration
st.set_page_config(
page_title="Hakim AI Assistant",
page_icon="πŸ€–",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better UI
st.markdown("""
<style>
.main-header {
text-align: center;
color: #2E86AB;
font-size: 2.5rem;
margin-bottom: 2rem;
}
.chat-message {
padding: 1rem;
border-radius: 10px;
margin: 1rem 0;
}
.user-message {
background-color: #E3F2FD;
border-left: 5px solid #2196F3;
}
.assistant-message {
background-color: #F1F8E9;
border-left: 5px solid #4CAF50;
}
.stTextArea textarea {
border-radius: 10px;
}
.error-box {
background-color: #ffebee;
border: 1px solid #f44336;
border-radius: 5px;
padding: 1rem;
margin: 1rem 0;
}
</style>
""", unsafe_allow_html=True)
@st.cache_resource
def load_model_and_tokenizer():
"""Load the model and tokenizer with caching for better performance"""
if not TORCH_AVAILABLE:
st.error("❌ PyTorch is not installed. Please check your requirements.txt file.")
return None, None, None
if not TRANSFORMERS_AVAILABLE:
st.error("❌ Transformers library is not installed. Please check your requirements.txt file.")
return None, None, None
try:
with st.spinner("πŸ”„ Loading Hakim model... This may take a few minutes on first load."):
# Load tokenizer first
tokenizer = AutoTokenizer.from_pretrained(
"Rabe3/Hakim",
trust_remote_code=True
)
# For CPU-only deployment, load model with specific settings
model = AutoModelForCausalLM.from_pretrained(
"Rabe3/Hakim",
torch_dtype=torch.float32, # Use float32 for CPU
device_map="cpu", # Force CPU usage
trust_remote_code=True,
low_cpu_mem_usage=True # Optimize for CPU memory usage
)
# Create pipeline optimized for CPU
text_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device="cpu", # Explicitly set to CPU
torch_dtype=torch.float32
)
st.success("βœ… Model loaded successfully!")
return tokenizer, model, text_pipeline
except Exception as e:
st.error(f"❌ Error loading model: {str(e)}")
st.info("πŸ’‘ Make sure the model 'Rabe3/Hakim' exists and is accessible.")
return None, None, None
def generate_response(pipeline_obj, prompt, system_prompt, max_length=256, temperature=0.7, top_p=0.9, do_sample=True):
"""Generate response using the model pipeline"""
if pipeline_obj is None:
return "❌ Model not loaded. Please refresh the page."
try:
# Combine system prompt with user input
full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:"
# Generate response with CPU-optimized settings
with st.spinner("πŸ€” Generating response..."):
response = pipeline_obj(
full_prompt,
max_new_tokens=max_length, # Use max_new_tokens instead of max_length
temperature=temperature,
top_p=top_p,
do_sample=do_sample,
pad_token_id=pipeline_obj.tokenizer.eos_token_id,
return_full_text=False,
num_return_sequences=1,
clean_up_tokenization_spaces=True
)
# Extract generated text
generated_text = response[0]['generated_text']
# Clean up the response
if "Assistant:" in generated_text:
generated_text = generated_text.split("Assistant:")[-1].strip()
# Remove any remaining prompt artifacts
lines = generated_text.split('\n')
cleaned_lines = []
for line in lines:
line = line.strip()
if line and not line.startswith("User:") and not line.startswith("Human:"):
cleaned_lines.append(line)
return '\n'.join(cleaned_lines) if cleaned_lines else "I apologize, but I couldn't generate a proper response."
except Exception as e:
return f"❌ Error generating response: {str(e)}"
def create_fallback_demo():
"""Create a simple demo mode when model loading fails"""
st.warning("πŸ”§ Model loading failed. Running in demo mode with simulated responses.")
class DemoTokenizer:
def __init__(self):
self.eos_token_id = 2
class DemoPipeline:
def __init__(self):
self.tokenizer = DemoTokenizer()
def __call__(self, prompt, **kwargs):
# Simulate response generation
time.sleep(1) # Simulate processing time
# Simple demo responses based on input
if any(arabic_word in prompt for arabic_word in ['Ω…Ψ±Ψ­Ψ¨Ψ§', 'Ψ§Ω„Ψ³Ω„Ψ§Ω…', 'Ψ£Ω‡Ω„Ψ§']):
response = "Ω…Ψ±Ψ­Ψ¨Ψ§ Ψ¨Ωƒ! Ψ£Ω†Ψ§ Ψ­ΩƒΩŠΩ…ΨŒ Ω…Ψ³Ψ§ΨΉΨ―Ωƒ Ψ§Ω„Ψ°ΩƒΩŠ. ΩƒΩŠΩ ΩŠΩ…ΩƒΩ†Ω†ΩŠ Ω…Ψ³Ψ§ΨΉΨ―ΨͺΩƒ Ψ§Ω„ΩŠΩˆΩ…ΨŸ"
elif 'hello' in prompt.lower() or 'hi' in prompt.lower():
response = "Hello! I'm Hakim, your AI assistant. How can I help you today?"
elif 'what' in prompt.lower() and 'ai' in prompt.lower():
response = "Artificial Intelligence (AI) refers to computer systems that can perform tasks that typically require human intelligence, such as learning, reasoning, and problem-solving."
else:
response = "I understand your question. This is a demo response since the actual model couldn't be loaded. In a real deployment, I would provide a more detailed and contextual answer based on the Rabe3/Hakim model."
return [{'generated_text': response}]
st.info("βœ… Demo mode initialized!")
return DemoTokenizer(), None, DemoPipeline()
def show_requirements_info():
"""Show information about missing requirements"""
st.error("🚫 Required libraries are missing!")
st.markdown("""
<div class="error-box">
<h3>Missing Dependencies</h3>
<p>Your Hugging Face Space needs the following libraries. Make sure your <code>requirements.txt</code> contains:</p>
<pre>
streamlit
torch --extra-index-url https://download.pytorch.org/whl/cpu
transformers
accelerate
sentencepiece
protobuf
</pre>
</div>
""", unsafe_allow_html=True)
def main():
# Header
st.markdown('<h1 class="main-header">πŸ€– Hakim AI Assistant</h1>', unsafe_allow_html=True)
# Check if required libraries are available
if not TORCH_AVAILABLE or not TRANSFORMERS_AVAILABLE:
show_requirements_info()
return
# Load model
tokenizer, model, pipeline_obj = load_model_and_tokenizer()
if pipeline_obj is None:
st.error("❌ Failed to load the model. Please check the logs and try again.")
return
# Sidebar for configuration
with st.sidebar:
st.header("βš™οΈ Configuration")
# System prompt
system_prompt = st.text_area(
"System Prompt",
value="You are Hakim, a helpful AI assistant. You provide accurate, helpful, and informative responses. You communicate clearly and professionally in a concise manner.",
height=150,
help="This prompt sets the behavior and personality of the AI assistant."
)
st.divider()
# Generation parameters (optimized for CPU)
st.subheader("Generation Parameters")
max_length = st.slider(
"Max New Tokens",
min_value=32,
max_value=512,
value=128,
step=16,
help="Maximum number of new tokens to generate (lower values are faster on CPU)"
)
temperature = st.slider(
"Temperature",
min_value=0.1,
max_value=1.5,
value=0.7,
step=0.1,
help="Controls randomness (lower = more focused, higher = more creative)"
)
top_p = st.slider(
"Top P",
min_value=0.5,
max_value=1.0,
value=0.9,
step=0.05,
help="Controls diversity via nucleus sampling"
)
do_sample = st.checkbox(
"Enable Sampling",
value=True,
help="Enable sampling for more diverse responses"
)
st.divider()
# Model info
st.subheader("ℹ️ Model Information")
st.info("**Model:** Rabe3/Hakim\n**Type:** Causal Language Model\n**Device:** CPU\n**Framework:** Transformers")
# Performance tip
st.warning("πŸ’‘ **CPU Performance Tip:** Lower token limits will generate responses faster.")
# Clear chat button
if st.button("πŸ—‘οΈ Clear Chat History", type="secondary", use_container_width=True):
if 'messages' in st.session_state:
st.session_state.messages = []
st.rerun()
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Main chat interface
st.header("πŸ’¬ Chat Interface")
# Display chat history
if st.session_state.messages:
for message in st.session_state.messages:
if message["role"] == "user":
st.markdown(f'<div class="chat-message user-message"><strong>You:</strong> {message["content"]}</div>', unsafe_allow_html=True)
else:
st.markdown(f'<div class="chat-message assistant-message"><strong>Hakim:</strong> {message["content"]}</div>', unsafe_allow_html=True)
else:
st.info("πŸ‘‹ Welcome! Start a conversation by typing your message below.")
# Chat input
user_input = st.text_area(
"Enter your message:",
height=100,
placeholder="Type your message here...",
key="user_input"
)
col1, col2 = st.columns([1, 2])
with col1:
send_button = st.button("πŸ“€ Send", type="primary", disabled=not user_input.strip())
with col2:
if st.button("πŸ’‘ Example Questions"):
examples = [
"Ω…Ψ±Ψ­Ψ¨Ψ§ΨŒ ΩƒΩŠΩ ΩŠΩ…ΩƒΩ†Ωƒ Ω…Ψ³Ψ§ΨΉΨ―Ψͺي؟", # Arabic: Hello, how can you help me?
"What is artificial intelligence?",
"Can you explain machine learning?",
"Tell me about renewable energy"
]
selected_example = st.selectbox("Choose an example:", [""] + examples, key="example_select")
if selected_example:
st.session_state.user_input = selected_example
# Process user input
if send_button and user_input.strip():
# Add user message to history
st.session_state.messages.append({"role": "user", "content": user_input})
# Generate response
response = generate_response(
pipeline_obj=pipeline_obj,
prompt=user_input,
system_prompt=system_prompt,
max_length=max_length,
temperature=temperature,
top_p=top_p,
do_sample=do_sample
)
# Add assistant response to history
st.session_state.messages.append({"role": "assistant", "content": response})
# Clear input and rerun
st.session_state.user_input = ""
st.rerun()
# Footer
st.divider()
st.markdown(
"""
<div style='text-align: center; color: #666; margin-top: 2rem;'>
<p>Powered by <strong>Rabe3/Hakim</strong> model from Hugging Face πŸ€—</p>
<p><em>Running on CPU - Optimized for Hugging Face Spaces</em></p>
</div>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
main()