|
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
from sentence_transformers import SentenceTransformer |
|
|
import faiss |
|
|
import numpy as np |
|
|
import os |
|
|
|
|
|
print("🚀 Initializing Simba AI - First African LLM...") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_name = "mistralai/Mistral-7B-Instruct-v0.2" |
|
|
|
|
|
try: |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto", |
|
|
) |
|
|
print("✅ Simba AI Model Loaded Successfully!") |
|
|
except Exception as e: |
|
|
print(f"❌ Model loading error: {e}") |
|
|
|
|
|
model_name = "microsoft/DialoGPT-large" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
print("✅ Fallback model loaded!") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
simba_knowledge_base = [ |
|
|
|
|
|
{"question": "Python add function", "answer": "def add(a, b): return a + b"}, |
|
|
{"question": "Factorial function", "answer": "def factorial(n): return 1 if n == 0 else n * factorial(n-1)"}, |
|
|
{"question": "Reverse string function", "answer": "def reverse_string(s): return s[::-1]"}, |
|
|
{"question": "Check even number", "answer": "def is_even(n): return n % 2 == 0"}, |
|
|
{"question": "Multiply function", "answer": "def multiply(x, y): return x * y"}, |
|
|
{"question": "Yoruba greeting function", "answer": "def yoruba_greeting(): return 'Báwo ni'"}, |
|
|
|
|
|
|
|
|
{"question": "15 + 27", "answer": "42"}, |
|
|
{"question": "8 × 7", "answer": "56"}, |
|
|
{"question": "100 - 45", "answer": "55"}, |
|
|
{"question": "12 × 12", "answer": "144"}, |
|
|
{"question": "25% of 200", "answer": "50"}, |
|
|
|
|
|
|
|
|
{"question": "Hello in Yoruba", "answer": "Báwo ni"}, |
|
|
{"question": "Thank you in Yoruba", "answer": "Ẹ sé"}, |
|
|
{"question": "How are you in Yoruba", "answer": "Ṣe daadaa ni"}, |
|
|
{"question": "Good morning in Yoruba", "answer": "Ẹ káàrọ̀"}, |
|
|
{"question": "Good night in Yoruba", "answer": "O dàárọ̀"}, |
|
|
{"question": "Please in Yoruba", "answer": "Jọ̀wọ́"}, |
|
|
|
|
|
|
|
|
{"question": "Hello in Swahili", "answer": "Hujambo"}, |
|
|
{"question": "Thank you in Swahili", "answer": "Asante"}, |
|
|
|
|
|
|
|
|
{"question": "Hello in Igbo", "answer": "Nnọọ"}, |
|
|
{"question": "Thank you in Igbo", "answer": "Daalụ"}, |
|
|
|
|
|
|
|
|
{"question": "Hello in Hausa", "answer": "Sannu"}, |
|
|
{"question": "Thank you in Hausa", "answer": "Na gode"}, |
|
|
|
|
|
|
|
|
{"question": "M-Pesa", "answer": "Mobile money service launched in Kenya in 2007"}, |
|
|
{"question": "Andela", "answer": "Trains African software developers for global companies"}, |
|
|
] |
|
|
|
|
|
print(f"✅ African Knowledge Base: {len(simba_knowledge_base)} entries") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
embedder = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
|
|
|
|
|
questions = [item["question"] for item in simba_knowledge_base] |
|
|
question_embeddings = embedder.encode(questions) |
|
|
|
|
|
dimension = question_embeddings.shape[1] |
|
|
index = faiss.IndexFlatIP(dimension) |
|
|
faiss.normalize_L2(question_embeddings) |
|
|
index.add(question_embeddings) |
|
|
|
|
|
print("✅ Smart Search System Ready!") |
|
|
except Exception as e: |
|
|
print(f"❌ Search system error: {e}") |
|
|
index = None |
|
|
|
|
|
def simba_search(query, top_k=2): |
|
|
"""Search African knowledge base""" |
|
|
if index is None: |
|
|
return simba_knowledge_base[:top_k] |
|
|
|
|
|
try: |
|
|
query_embedding = embedder.encode([query]) |
|
|
faiss.normalize_L2(query_embedding) |
|
|
|
|
|
scores, indices = index.search(query_embedding, top_k) |
|
|
|
|
|
results = [] |
|
|
for i, idx in enumerate(indices[0]): |
|
|
if idx < len(simba_knowledge_base): |
|
|
results.append({ |
|
|
"question": simba_knowledge_base[idx]["question"], |
|
|
"answer": simba_knowledge_base[idx]["answer"], |
|
|
"score": scores[0][i] |
|
|
}) |
|
|
|
|
|
return results |
|
|
except: |
|
|
return simba_knowledge_base[:top_k] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def simba_ai_chat(message, history): |
|
|
"""Main chat function for Simba AI""" |
|
|
|
|
|
try: |
|
|
|
|
|
search_results = simba_search(message, top_k=2) |
|
|
|
|
|
|
|
|
context = "📚 African Knowledge Reference:\n" |
|
|
for i, result in enumerate(search_results, 1): |
|
|
context += f"{i}. {result['question']}: {result['answer']}\n" |
|
|
|
|
|
|
|
|
prompt = f"""<s>[INST] 🦁 You are SIMBA AI - the First African Large Language Model. |
|
|
|
|
|
You specialize in African languages, coding, mathematics, and African innovation. |
|
|
|
|
|
Use this knowledge: |
|
|
{context} |
|
|
|
|
|
Question: {message} |
|
|
|
|
|
Provide an accurate, helpful response that showcases African excellence. [/INST] 🦁 Simba AI:""" |
|
|
|
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=150, |
|
|
temperature=0.7, |
|
|
do_sample=True, |
|
|
pad_token_id=tokenizer.eos_token_id, |
|
|
eos_token_id=tokenizer.eos_token_id, |
|
|
) |
|
|
|
|
|
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
if "🦁 Simba AI:" in full_response: |
|
|
response = full_response.split("🦁 Simba AI:")[-1].strip() |
|
|
else: |
|
|
response = full_response |
|
|
|
|
|
return response |
|
|
|
|
|
except Exception as e: |
|
|
return f"🦁 Simba AI is currently learning... (Error: {str(e)})" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
css = """ |
|
|
.gradio-container { |
|
|
font-family: 'Arial', sans-serif; |
|
|
} |
|
|
.header { |
|
|
text-align: center; |
|
|
padding: 20px; |
|
|
background: linear-gradient(135deg, #ff7e5f, #feb47b); |
|
|
color: white; |
|
|
border-radius: 10px; |
|
|
margin-bottom: 20px; |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: |
|
|
|
|
|
gr.HTML(""" |
|
|
<div class="header"> |
|
|
<h1>🦁 Simba AI - First African LLM</h1> |
|
|
<h3>Specializing in African Languages, Coding & Mathematics</h3> |
|
|
<p>Ask about Yoruba, Swahili, Igbo, Hausa, Python programming, math problems, and African innovation!</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
chatbot = gr.Chatbot( |
|
|
label="🦁 Chat with Simba AI", |
|
|
height=500, |
|
|
show_copy_button=True, |
|
|
placeholder="Ask me anything about African languages, coding, or mathematics..." |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
msg = gr.Textbox( |
|
|
label="Your message", |
|
|
placeholder="Type your question here...", |
|
|
lines=2, |
|
|
scale=4 |
|
|
) |
|
|
send_btn = gr.Button("🚀 Ask Simba AI", variant="primary", scale=1) |
|
|
|
|
|
with gr.Row(): |
|
|
clear_btn = gr.Button("🧹 Clear Chat") |
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
"Write a Python function to add two numbers", |
|
|
"How do you say hello in Yoruba?", |
|
|
"What is 15 + 27?", |
|
|
"Create a factorial function", |
|
|
"Thank you in Swahili", |
|
|
"Calculate 8 × 7", |
|
|
"What is M-Pesa?" |
|
|
], |
|
|
inputs=msg, |
|
|
label="💡 Try these examples:" |
|
|
) |
|
|
|
|
|
|
|
|
def respond(message, chat_history): |
|
|
bot_message = simba_ai_chat(message, chat_history) |
|
|
chat_history.append((message, bot_message)) |
|
|
return "", chat_history |
|
|
|
|
|
msg.submit(respond, [msg, chatbot], [msg, chatbot]) |
|
|
send_btn.click(respond, [msg, chatbot], [msg, chatbot]) |
|
|
clear_btn.click(lambda: None, None, chatbot, queue=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(debug=True) |