mathewisrich
Deploy Alkebulan AI Chatbot
af6028a
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from huggingface_hub import login
# Log in to Hugging Face Hub
login(token=os.environ.get("HF_AUTH_TOKEN", "YOUR_HUGGING_FACE_TOKEN"))
# Load the model and tokenizer
def load_model():
print("Loading model...")
base_model_name = "mistralai/Mistral-7B-v0.1"
adapter_model_name = "Psalms23Wave/Alkebulan-AI"
# Use CUDA if available, otherwise CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Free up memory
if torch.cuda.is_available():
torch.cuda.empty_cache()
# Load the base model
print(f"Loading base model: {base_model_name}")
model = AutoModelForCausalLM.from_pretrained(
base_model_name,
token=os.environ.get("HF_AUTH_TOKEN", "YOUR_HUGGING_FACE_TOKEN"),
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
low_cpu_mem_usage=True,
device_map="auto" if device == "cuda" else None
)
# Load adapter weights
try:
print(f"Loading adapter: {adapter_model_name}")
model = PeftModel.from_pretrained(
model,
adapter_model_name,
token=os.environ.get("HF_AUTH_TOKEN", "YOUR_HUGGING_FACE_TOKEN")
)
print("Adapter loaded successfully")
except Exception as e:
print(f"Error loading adapter: {str(e)}")
print("Continuing with base model only")
# Load tokenizer
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
adapter_model_name,
token=os.environ.get("HF_AUTH_TOKEN", "YOUR_HUGGING_FACE_TOKEN")
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print("Model and tokenizer loaded successfully!")
return model, tokenizer
# Define the chatbot function
def chat(message, history, language, max_tokens=100, temperature=0.7, model=None, tokenizer=None):
"""
Generates a response from the chatbot
"""
try:
# Build prompt
prompt = f"Language: {language}\nUser: {message}\nBot:"
# Tokenize with length limitation
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id
)
# Process response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = response.split("Bot:")[1].strip() if "Bot:" in response else response[len(prompt):].strip()
# Clean up memory
del inputs, outputs
if torch.cuda.is_available():
torch.cuda.empty_cache()
return response
except Exception as e:
return f"Sorry, I encountered an error: {str(e)}. Please try again."
# Define the translation function
def translate(text, target_language, model, tokenizer):
"""
Translates text to the target language.
"""
try:
prompt = f"Translate this to {target_language}: \"{text}\"\nTranslation:"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
temperature=0.3,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id
)
translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
translation = translation.split("Translation:")[1].strip() if "Translation:" in translation else translation.strip()
del inputs, outputs
if torch.cuda.is_available():
torch.cuda.empty_cache()
return translation
except Exception as e:
return f"Sorry, I encountered an error: {str(e)}. Please try again."
# Gradio interface with lazy loading
def create_interface():
# Load model only when needed
model_loaded = False
model, tokenizer = None, None
def load_model_if_needed():
nonlocal model, tokenizer, model_loaded
if not model_loaded:
try:
model, tokenizer = load_model()
model_loaded = True
except Exception as e:
print(f"Error loading model: {str(e)}")
raise e
return model, tokenizer
# Main interface function
def chatbot_interface(message, history, language, max_tokens, temperature):
try:
# Load model on first use
model, tokenizer = load_model_if_needed()
# Process the message
if "translate" in message.lower() and "to" in message.lower():
# Handle translation
parts = message.lower().split("translate")[1].split("to")
if len(parts) >= 2:
text_to_translate = parts[0].strip()
target_lang = parts[1].strip()
translation = translate(text_to_translate, target_lang, model, tokenizer)
response = f"Translation to {target_lang}: {translation}"
else:
response = "Please specify what to translate and target language, e.g., 'translate Hello to Luganda'"
else:
# Regular chat
response = chat(message, history, language, max_tokens, temperature, model, tokenizer)
# Add to history
updated_history = history.copy()
updated_history.append((message, response))
return updated_history, updated_history
except Exception as e:
error_message = f"Sorry, I encountered an error: {str(e)}. Please try again."
updated_history = history.copy()
updated_history.append((message, error_message))
return updated_history, updated_history
# Create Gradio interface
with gr.Blocks(title="Alkebulan AI Chatbot") as demo:
gr.Markdown("# Alkebulan AI Chatbot")
gr.Markdown("Chat in Luganda, Iteso, Runyankore, Acholi, or Ateso!")
chat_history = gr.State([])
with gr.Row():
with gr.Column(scale=1):
language = gr.Dropdown(
label="Select Language",
choices=["Luganda", "Iteso", "Runyankore", "Acholi", "Ateso", "English"],
value="Luganda"
)
with gr.Accordion("Advanced Settings", open=False):
max_tokens_slider = gr.Slider(minimum=50, maximum=200, value=100, step=10, label="Max Tokens")
temperature_slider = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature")
gr.Markdown("""
## Examples:
- Basic chat: Just type a message in the selected language
- Translate: "translate How are you to Luganda"
""")
with gr.Column(scale=3):
chatbot = gr.Chatbot(label="Chat History", height=400)
with gr.Row():
message = gr.Textbox(
label="Your Message",
placeholder="Type your message here...",
scale=8
)
submit = gr.Button("Send", scale=1)
clear = gr.Button("Clear", scale=1)
# Set up interactions
submit.click(
chatbot_interface,
inputs=[message, chat_history, language, max_tokens_slider, temperature_slider],
outputs=[chatbot, chat_history]
)
message.submit(
chatbot_interface,
inputs=[message, chat_history, language, max_tokens_slider, temperature_slider],
outputs=[chatbot, chat_history]
)
clear.click(lambda: [], None, chatbot, queue=False)
clear.click(lambda: [], None, chat_history, queue=False)
return demo
# Launch the interface
if __name__ == "__main__":
demo = create_interface()
demo.launch(share=True) # Add share=True to get a public URL