Stanley03 commited on
Commit
696e051
·
verified ·
1 Parent(s): 31b0832

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -78
app.py CHANGED
@@ -1,16 +1,14 @@
1
  import gradio as gr
2
  import os
3
- from langchain_text_splitters import CharacterTextSplitter
4
- from langchain_community.embeddings import HuggingFaceEmbeddings
5
- from langchain_community.vectorstores import FAISS
6
- from langchain_community.llms import HuggingFaceHub
7
 
8
  # --- Configuration ---
9
- # Use the Inference API for the LLM to avoid memory issues on free CPU tier
10
  MODEL_NAME = "CraneAILabs/swahili-gemma-1b-litert"
11
- EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
12
 
13
- # --- Transcript Data ---
14
  NURSE_TOTO_TRANSCRIPT = """
15
  # A Nurse Toto - Episode 1: Mzee wa Kutahirii (Kiswahili Transcript)
16
  **Series:** A Nurse Toto
@@ -148,42 +146,41 @@ NURSE_TOTO_TRANSCRIPT = """
148
  **Sly:** Ndio maana ulikuwa unasema tungoje, sindio?
149
  """
150
 
151
- # --- Global Variables ---
152
- llm = None
153
- vector_db = None
154
-
155
- def setup_system():
156
- """Initializes the LLM (via Inference API) and the Vector Database for RAG."""
157
- global llm, vector_db
158
-
159
- # 1. Initialize LLM using HuggingFaceHub (Inference API)
160
- # The user MUST set the HF_TOKEN secret in their Hugging Face Space settings.
161
- if "HF_TOKEN" not in os.environ:
162
- raise ValueError("HF_TOKEN environment variable not set. Please set your Hugging Face API token as a secret.")
163
 
164
- print(f"Initializing LLM via HuggingFaceHub Inference API: {MODEL_NAME}...")
165
- llm = HuggingFaceHub(
166
- repo_id=MODEL_NAME,
167
- model_kwargs={"temperature": 0.7, "max_length": 256}
168
- )
169
- print("LLM initialized successfully.")
170
-
171
- # 2. Setup Vector DB for RAG
172
- text_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=1000, chunk_overlap=200)
173
- texts = text_splitter.create_documents([NURSE_TOTO_TRANSCRIPT])
174
 
175
- print("Creating embeddings and vector store...")
176
- embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
177
- vector_db = FAISS.from_documents(texts, embeddings)
178
- print("System setup complete.")
 
 
 
 
 
179
 
180
  def generate_response(message, history):
181
- """Main chat function supporting both general chat and RAG."""
182
- # 1. Retrieve relevant context from the transcript
183
- docs = vector_db.similarity_search(message, k=2)
184
- context = "\n".join([doc.page_content for doc in docs])
 
185
 
186
- # 2. Construct the prompt
 
 
 
187
  system_prompt = (
188
  "Wewe ni msaidizi wa AI unayezungumza Kiswahili na Sheng. "
189
  "Unaweza kufanya mazungumzo ya kawaida au kujibu maswali kuhusu 'Nurse Toto' "
@@ -193,47 +190,51 @@ def generate_response(message, history):
193
 
194
  full_prompt = f"{system_prompt}\n\nMuktadha wa Nurse Toto:\n{context}\n\nUser: {message}\nAssistant:"
195
 
196
- # 3. Generate response using the LLM (Inference API call)
 
 
 
 
 
 
 
 
 
197
  try:
198
- response = llm.invoke(full_prompt)
199
- # Clean up the response, as the LLM might repeat the prompt
200
- if full_prompt in response:
201
- response = response.split(full_prompt)[-1].strip()
202
- return response.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  except Exception as e:
204
- # Handle API token or rate limit errors
205
- if "API token" in str(e) or "401" in str(e):
206
- return "Samahani, kuna tatizo la uthibitishaji. Tafadhali hakikisha umeweka siri ya **HF_TOKEN** kwa usahihi katika mipangilio ya Space yako."
207
- return f"Samahani, kuna tatizo la kiufundi. Jaribu tena. Kosa: {e}"
208
 
209
  # --- Gradio Interface ---
210
- try:
211
- setup_system()
212
- # Launch Gradio only if setup was successful
213
- gr.ChatInterface(
214
- fn=generate_response,
215
- title="Lightweight Swahili/Sheng Chatbot (Nurse Toto RAG)",
216
- description="Chat na AI kwa Kiswahili au Sheng! Inajua mambo ya Nurse Toto na mambo mengine ya kawaida. **Kumbuka:** Unahitaji kuweka siri ya **HF_TOKEN** katika mipangilio ya Space.",
217
- examples=[
218
- ["Habari yako? Unaweza kunisaidia nini leo?"],
219
- ["Nieleze kuhusu Casypool kwenye Nurse Toto."],
220
- ["Sheng ya 'How are you' ni gani?"],
221
- ["Mzee alitaka kufanya nini hospitalini?"],
222
- ]
223
- ).launch()
224
- except ValueError as e:
225
- # Handle the missing HF_TOKEN error during setup
226
- gr.Interface(
227
- fn=lambda x: f"Kosa la Usanidi: {e}. Tafadhali weka siri ya HF_TOKEN katika mipangilio ya Space yako.",
228
- inputs="text",
229
- outputs="text",
230
- title="Chatbot Initialization Failed - HF_TOKEN Missing"
231
- ).launch()
232
- except Exception as e:
233
- # Handle other setup errors
234
- gr.Interface(
235
- fn=lambda x: f"Kosa la Usanidi: Mfumo haukuweza kuanza. Kosa: {e}",
236
- inputs="text",
237
- outputs="text",
238
- title="Chatbot Initialization Failed"
239
- ).launch()
 
1
  import gradio as gr
2
  import os
3
+ import requests
4
+ import json
 
 
5
 
6
  # --- Configuration ---
7
+ # Model to use via the Inference API
8
  MODEL_NAME = "CraneAILabs/swahili-gemma-1b-litert"
9
+ API_URL = f"https://api-inference.huggingface.co/models/{MODEL_NAME}"
10
 
11
+ # --- Transcript Data (for RAG) ---
12
  NURSE_TOTO_TRANSCRIPT = """
13
  # A Nurse Toto - Episode 1: Mzee wa Kutahirii (Kiswahili Transcript)
14
  **Series:** A Nurse Toto
 
146
  **Sly:** Ndio maana ulikuwa unasema tungoje, sindio?
147
  """
148
 
149
+ # --- RAG Logic (Simplified) ---
150
+ # This is a highly simplified RAG implementation for demonstration purposes.
151
+ # In a real-world scenario, you would use a proper vector database.
152
+ def simple_rag_lookup(query):
153
+ """Performs a simple keyword-based lookup in the transcript."""
154
+ # Split transcript into "chunks" (lines or paragraphs)
155
+ chunks = NURSE_TOTO_TRANSCRIPT.split('\n\n')
 
 
 
 
 
156
 
157
+ # Simple keyword matching (case-insensitive)
158
+ relevant_chunks = [
159
+ chunk for chunk in chunks
160
+ if any(word.lower() in chunk.lower() for word in query.split())
161
+ ]
 
 
 
 
 
162
 
163
+ # Return a maximum of 3 relevant chunks
164
+ return "\n\n".join(relevant_chunks[:3])
165
+
166
+ # --- LLM API Call ---
167
+ def query_api(payload):
168
+ """Sends a request to the Hugging Face Inference API."""
169
+ headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
170
+ response = requests.post(API_URL, headers=headers, json=payload)
171
+ return response.json()
172
 
173
  def generate_response(message, history):
174
+ """Main chat function using the Inference API."""
175
+
176
+ # 1. Check for API Token
177
+ if "HF_TOKEN" not in os.environ or not os.environ.get("HF_TOKEN"):
178
+ return "Kosa la Usanidi: Tafadhali weka siri ya **HF_TOKEN** katika mipangilio ya Space yako. Huwezi kutumia API bila token."
179
 
180
+ # 2. Retrieve context (using simplified RAG)
181
+ context = simple_rag_lookup(message)
182
+
183
+ # 3. Construct the prompt
184
  system_prompt = (
185
  "Wewe ni msaidizi wa AI unayezungumza Kiswahili na Sheng. "
186
  "Unaweza kufanya mazungumzo ya kawaida au kujibu maswali kuhusu 'Nurse Toto' "
 
190
 
191
  full_prompt = f"{system_prompt}\n\nMuktadha wa Nurse Toto:\n{context}\n\nUser: {message}\nAssistant:"
192
 
193
+ # 4. Generate response using the API
194
+ payload = {
195
+ "inputs": full_prompt,
196
+ "parameters": {
197
+ "max_new_tokens": 256,
198
+ "temperature": 0.7,
199
+ "return_full_text": False
200
+ }
201
+ }
202
+
203
  try:
204
+ api_response = query_api(payload)
205
+
206
+ if isinstance(api_response, list) and api_response:
207
+ response_text = api_response[0].get("generated_text", "").strip()
208
+
209
+ # Clean up the response, as the LLM might repeat the prompt
210
+ if "Assistant:" in response_text:
211
+ response_text = response_text.split("Assistant:")[-1].strip()
212
+
213
+ return response_text
214
+
215
+ elif isinstance(api_response, dict) and "error" in api_response:
216
+ # Handle API errors (e.g., model loading, rate limit)
217
+ error_msg = api_response["error"]
218
+ if "Authorization" in error_msg or "Invalid token" in error_msg:
219
+ return "Kosa la Uthibitishaji: Tafadhali hakikisha siri ya **HF_TOKEN** ni sahihi na ina ruhusa ya 'read'."
220
+ return f"Kosa la API: {error_msg}. Jaribu tena."
221
+
222
+ return "Samahani, sikupata jibu kutoka kwa mfumo wa lugha."
223
+
224
+ except requests.exceptions.RequestException as e:
225
+ return f"Kosa la Mtandao: Imeshindwa kuunganisha na API. Kosa: {e}"
226
  except Exception as e:
227
+ return f"Kosa lisilotarajiwa: {e}"
 
 
 
228
 
229
  # --- Gradio Interface ---
230
+ gr.ChatInterface(
231
+ fn=generate_response,
232
+ title="Zero-Memory Swahili/Sheng Chatbot (API-Powered)",
233
+ description="Chat na AI kwa Kiswahili au Sheng! Inajua mambo ya Nurse Toto na mambo mengine ya kawaida. **Kumbuka:** Unahitaji kuweka siri ya **HF_TOKEN** katika mipangilio ya Space.",
234
+ examples=[
235
+ ["Habari yako? Unaweza kunisaidia nini leo?"],
236
+ ["Nieleze kuhusu Casypool kwenye Nurse Toto."],
237
+ ["Sheng ya 'How are you' ni gani?"],
238
+ ["Mzee alitaka kufanya nini hospitalini?"],
239
+ ]
240
+ ).launch()