File size: 8,504 Bytes
3b2416f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# =====================
# 🦁 SIMBA AI - First African LLM
# =====================

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import os

print("🚀 Initializing Simba AI - First African LLM...")

# =====================
# LOAD AI MODEL
# =====================

model_name = "mistralai/Mistral-7B-Instruct-v0.2"

try:
    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto",
    )
    print("✅ Simba AI Model Loaded Successfully!")
except Exception as e:
    print(f"❌ Model loading error: {e}")
    # Fallback to smaller model if needed
    model_name = "microsoft/DialoGPT-large"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token
    model = AutoModelForCausalLM.from_pretrained(model_name)
    print("✅ Fallback model loaded!")

# =====================
# AFRICAN KNOWLEDGE BASE
# =====================

simba_knowledge_base = [
    # CODING
    {"question": "Python add function", "answer": "def add(a, b): return a + b"},
    {"question": "Factorial function", "answer": "def factorial(n): return 1 if n == 0 else n * factorial(n-1)"},
    {"question": "Reverse string function", "answer": "def reverse_string(s): return s[::-1]"},
    {"question": "Check even number", "answer": "def is_even(n): return n % 2 == 0"},
    {"question": "Multiply function", "answer": "def multiply(x, y): return x * y"},
    {"question": "Yoruba greeting function", "answer": "def yoruba_greeting(): return 'Báwo ni'"},
    
    # MATH
    {"question": "15 + 27", "answer": "42"},
    {"question": "8 × 7", "answer": "56"},
    {"question": "100 - 45", "answer": "55"},
    {"question": "12 × 12", "answer": "144"},
    {"question": "25% of 200", "answer": "50"},
    
    # YORUBA
    {"question": "Hello in Yoruba", "answer": "Báwo ni"},
    {"question": "Thank you in Yoruba", "answer": "Ẹ sé"},
    {"question": "How are you in Yoruba", "answer": "Ṣe daadaa ni"},
    {"question": "Good morning in Yoruba", "answer": "Ẹ káàrọ̀"},
    {"question": "Good night in Yoruba", "answer": "O dàárọ̀"},
    {"question": "Please in Yoruba", "answer": "Jọ̀wọ́"},
    
    # SWAHILI
    {"question": "Hello in Swahili", "answer": "Hujambo"},
    {"question": "Thank you in Swahili", "answer": "Asante"},
    
    # IGBO
    {"question": "Hello in Igbo", "answer": "Nnọọ"},
    {"question": "Thank you in Igbo", "answer": "Daalụ"},
    
    # HAUSA
    {"question": "Hello in Hausa", "answer": "Sannu"},
    {"question": "Thank you in Hausa", "answer": "Na gode"},
    
    # AFRICAN INNOVATION
    {"question": "M-Pesa", "answer": "Mobile money service launched in Kenya in 2007"},
    {"question": "Andela", "answer": "Trains African software developers for global companies"},
]

print(f"✅ African Knowledge Base: {len(simba_knowledge_base)} entries")

# =====================
# SEARCH SYSTEM
# =====================

try:
    embedder = SentenceTransformer('all-MiniLM-L6-v2')
    
    # Build search index
    questions = [item["question"] for item in simba_knowledge_base]
    question_embeddings = embedder.encode(questions)

    dimension = question_embeddings.shape[1]
    index = faiss.IndexFlatIP(dimension)
    faiss.normalize_L2(question_embeddings)
    index.add(question_embeddings)

    print("✅ Smart Search System Ready!")
except Exception as e:
    print(f"❌ Search system error: {e}")
    index = None

def simba_search(query, top_k=2):
    """Search African knowledge base"""
    if index is None:
        return simba_knowledge_base[:top_k]  # Fallback
    
    try:
        query_embedding = embedder.encode([query])
        faiss.normalize_L2(query_embedding)
        
        scores, indices = index.search(query_embedding, top_k)
        
        results = []
        for i, idx in enumerate(indices[0]):
            if idx < len(simba_knowledge_base):
                results.append({
                    "question": simba_knowledge_base[idx]["question"],
                    "answer": simba_knowledge_base[idx]["answer"],
                    "score": scores[0][i]
                })
        
        return results
    except:
        return simba_knowledge_base[:top_k]  # Fallback

# =====================
# SIMBA AI CHAT FUNCTION
# =====================

def simba_ai_chat(message, history):
    """Main chat function for Simba AI"""
    
    try:
        # Search for relevant knowledge
        search_results = simba_search(message, top_k=2)
        
        # Build context
        context = "📚 African Knowledge Reference:\n"
        for i, result in enumerate(search_results, 1):
            context += f"{i}. {result['question']}: {result['answer']}\n"
        
        # Build prompt
        prompt = f"""<s>[INST] 🦁 You are SIMBA AI - the First African Large Language Model.

You specialize in African languages, coding, mathematics, and African innovation.

Use this knowledge:
{context}

Question: {message}

Provide an accurate, helpful response that showcases African excellence. [/INST] 🦁 Simba AI:"""
        
        # Generate response
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=150,
                temperature=0.7,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id,
            )
        
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract response
        if "🦁 Simba AI:" in full_response:
            response = full_response.split("🦁 Simba AI:")[-1].strip()
        else:
            response = full_response
        
        return response
        
    except Exception as e:
        return f"🦁 Simba AI is currently learning... (Error: {str(e)})"

# =====================
# GRADIO INTERFACE
# =====================

# Custom CSS for African theme
css = """
.gradio-container {
    font-family: 'Arial', sans-serif;
}
.header {
    text-align: center;
    padding: 20px;
    background: linear-gradient(135deg, #ff7e5f, #feb47b);
    color: white;
    border-radius: 10px;
    margin-bottom: 20px;
}
"""

# Create chat interface
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
    
    gr.HTML("""
    <div class="header">
        <h1>🦁 Simba AI - First African LLM</h1>
        <h3>Specializing in African Languages, Coding & Mathematics</h3>
        <p>Ask about Yoruba, Swahili, Igbo, Hausa, Python programming, math problems, and African innovation!</p>
    </div>
    """)
    
    chatbot = gr.Chatbot(
        label="🦁 Chat with Simba AI",
        height=500,
        show_copy_button=True,
        placeholder="Ask me anything about African languages, coding, or mathematics..."
    )
    
    with gr.Row():
        msg = gr.Textbox(
            label="Your message",
            placeholder="Type your question here...",
            lines=2,
            scale=4
        )
        send_btn = gr.Button("🚀 Ask Simba AI", variant="primary", scale=1)
    
    with gr.Row():
        clear_btn = gr.Button("🧹 Clear Chat")
    
    # Examples
    gr.Examples(
        examples=[
            "Write a Python function to add two numbers",
            "How do you say hello in Yoruba?",
            "What is 15 + 27?",
            "Create a factorial function",
            "Thank you in Swahili",
            "Calculate 8 × 7",
            "What is M-Pesa?"
        ],
        inputs=msg,
        label="💡 Try these examples:"
    )
    
    # Event handlers
    def respond(message, chat_history):
        bot_message = simba_ai_chat(message, chat_history)
        chat_history.append((message, bot_message))
        return "", chat_history
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    send_btn.click(respond, [msg, chatbot], [msg, chatbot])
    clear_btn.click(lambda: None, None, chatbot, queue=False)

# =====================
# LAUNCH
# =====================

if __name__ == "__main__":
    demo.launch(debug=True)