Spaces:

Stanley03
/

testswa

Sleeping

App Files Files Community

Stanley03 commited on Jan 24

Commit

696e051

verified ·

1 Parent(s): 31b0832

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -78

app.py CHANGED Viewed

@@ -1,16 +1,14 @@
 import gradio as gr
 import os
-from langchain_text_splitters import CharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
-from langchain_community.llms import HuggingFaceHub
 # --- Configuration ---
-# Use the Inference API for the LLM to avoid memory issues on free CPU tier
 MODEL_NAME = "CraneAILabs/swahili-gemma-1b-litert"
-EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
-# --- Transcript Data ---
 NURSE_TOTO_TRANSCRIPT = """
 # A Nurse Toto - Episode 1: Mzee wa Kutahirii (Kiswahili Transcript)
 **Series:** A Nurse Toto
@@ -148,42 +146,41 @@ NURSE_TOTO_TRANSCRIPT = """
 **Sly:** Ndio maana ulikuwa unasema tungoje, sindio?
 """
-# --- Global Variables ---
-llm = None
-vector_db = None
-def setup_system():
-    """Initializes the LLM (via Inference API) and the Vector Database for RAG."""
-    global llm, vector_db
-    # 1. Initialize LLM using HuggingFaceHub (Inference API)
-    # The user MUST set the HF_TOKEN secret in their Hugging Face Space settings.
-    if "HF_TOKEN" not in os.environ:
-        raise ValueError("HF_TOKEN environment variable not set. Please set your Hugging Face API token as a secret.")
-    print(f"Initializing LLM via HuggingFaceHub Inference API: {MODEL_NAME}...")
-    llm = HuggingFaceHub(
-        repo_id=MODEL_NAME,
-        model_kwargs={"temperature": 0.7, "max_length": 256}
-    )
-    print("LLM initialized successfully.")
-    # 2. Setup Vector DB for RAG
-    text_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=1000, chunk_overlap=200)
-    texts = text_splitter.create_documents([NURSE_TOTO_TRANSCRIPT])
-    print("Creating embeddings and vector store...")
-    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
-    vector_db = FAISS.from_documents(texts, embeddings)
-    print("System setup complete.")
 def generate_response(message, history):
-    """Main chat function supporting both general chat and RAG."""
-    # 1. Retrieve relevant context from the transcript
-    docs = vector_db.similarity_search(message, k=2)
-    context = "\n".join([doc.page_content for doc in docs])
-    # 2. Construct the prompt
     system_prompt = (
         "Wewe ni msaidizi wa AI unayezungumza Kiswahili na Sheng. "
         "Unaweza kufanya mazungumzo ya kawaida au kujibu maswali kuhusu 'Nurse Toto' "
@@ -193,47 +190,51 @@ def generate_response(message, history):
     full_prompt = f"{system_prompt}\n\nMuktadha wa Nurse Toto:\n{context}\n\nUser: {message}\nAssistant:"
-    # 3. Generate response using the LLM (Inference API call)
     try:
-        response = llm.invoke(full_prompt)
-        # Clean up the response, as the LLM might repeat the prompt
-        if full_prompt in response:
-            response = response.split(full_prompt)[-1].strip()
-        return response.strip()
     except Exception as e:
-        # Handle API token or rate limit errors
-        if "API token" in str(e) or "401" in str(e):
-            return "Samahani, kuna tatizo la uthibitishaji. Tafadhali hakikisha umeweka siri ya **HF_TOKEN** kwa usahihi katika mipangilio ya Space yako."
-        return f"Samahani, kuna tatizo la kiufundi. Jaribu tena. Kosa: {e}"
 # --- Gradio Interface ---
-try:
-    setup_system()
-    # Launch Gradio only if setup was successful
-    gr.ChatInterface(
-        fn=generate_response,
-        title="Lightweight Swahili/Sheng Chatbot (Nurse Toto RAG)",
-        description="Chat na AI kwa Kiswahili au Sheng! Inajua mambo ya Nurse Toto na mambo mengine ya kawaida. **Kumbuka:** Unahitaji kuweka siri ya **HF_TOKEN** katika mipangilio ya Space.",
-        examples=[
-            ["Habari yako? Unaweza kunisaidia nini leo?"],
-            ["Nieleze kuhusu Casypool kwenye Nurse Toto."],
-            ["Sheng ya 'How are you' ni gani?"],
-            ["Mzee alitaka kufanya nini hospitalini?"],
-        ]
-    ).launch()
-except ValueError as e:
-    # Handle the missing HF_TOKEN error during setup
-    gr.Interface(
-        fn=lambda x: f"Kosa la Usanidi: {e}. Tafadhali weka siri ya HF_TOKEN katika mipangilio ya Space yako.",
-        inputs="text",
-        outputs="text",
-        title="Chatbot Initialization Failed - HF_TOKEN Missing"
-    ).launch()
-except Exception as e:
-    # Handle other setup errors
-    gr.Interface(
-        fn=lambda x: f"Kosa la Usanidi: Mfumo haukuweza kuanza. Kosa: {e}",
-        inputs="text",
-        outputs="text",
-        title="Chatbot Initialization Failed"
-    ).launch()

 import gradio as gr
 import os
+import requests
+import json
 # --- Configuration ---
+# Model to use via the Inference API
 MODEL_NAME = "CraneAILabs/swahili-gemma-1b-litert"
+API_URL = f"https://api-inference.huggingface.co/models/{MODEL_NAME}"
+# --- Transcript Data (for RAG) ---
 NURSE_TOTO_TRANSCRIPT = """
 # A Nurse Toto - Episode 1: Mzee wa Kutahirii (Kiswahili Transcript)
 **Series:** A Nurse Toto
 **Sly:** Ndio maana ulikuwa unasema tungoje, sindio?
 """
+# --- RAG Logic (Simplified) ---
+# This is a highly simplified RAG implementation for demonstration purposes.
+# In a real-world scenario, you would use a proper vector database.
+def simple_rag_lookup(query):
+    """Performs a simple keyword-based lookup in the transcript."""
+    # Split transcript into "chunks" (lines or paragraphs)
+    chunks = NURSE_TOTO_TRANSCRIPT.split('\n\n')
+    # Simple keyword matching (case-insensitive)
+    relevant_chunks = [
+        chunk for chunk in chunks
+        if any(word.lower() in chunk.lower() for word in query.split())
+    ]
+    # Return a maximum of 3 relevant chunks
+    return "\n\n".join(relevant_chunks[:3])
+# --- LLM API Call ---
+def query_api(payload):
+    """Sends a request to the Hugging Face Inference API."""
+    headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
 def generate_response(message, history):
+    """Main chat function using the Inference API."""
+    # 1. Check for API Token
+    if "HF_TOKEN" not in os.environ or not os.environ.get("HF_TOKEN"):
+        return "Kosa la Usanidi: Tafadhali weka siri ya **HF_TOKEN** katika mipangilio ya Space yako. Huwezi kutumia API bila token."
+    # 2. Retrieve context (using simplified RAG)
+    context = simple_rag_lookup(message)
+    # 3. Construct the prompt
     system_prompt = (
         "Wewe ni msaidizi wa AI unayezungumza Kiswahili na Sheng. "
         "Unaweza kufanya mazungumzo ya kawaida au kujibu maswali kuhusu 'Nurse Toto' "
     full_prompt = f"{system_prompt}\n\nMuktadha wa Nurse Toto:\n{context}\n\nUser: {message}\nAssistant:"
+    # 4. Generate response using the API
+    payload = {
+        "inputs": full_prompt,
+        "parameters": {
+            "max_new_tokens": 256,
+            "temperature": 0.7,
+            "return_full_text": False
+        }
+    }
     try:
+        api_response = query_api(payload)
+        if isinstance(api_response, list) and api_response:
+            response_text = api_response[0].get("generated_text", "").strip()
+            # Clean up the response, as the LLM might repeat the prompt
+            if "Assistant:" in response_text:
+                response_text = response_text.split("Assistant:")[-1].strip()
+            return response_text
+        elif isinstance(api_response, dict) and "error" in api_response:
+            # Handle API errors (e.g., model loading, rate limit)
+            error_msg = api_response["error"]
+            if "Authorization" in error_msg or "Invalid token" in error_msg:
+                return "Kosa la Uthibitishaji: Tafadhali hakikisha siri ya **HF_TOKEN** ni sahihi na ina ruhusa ya 'read'."
+            return f"Kosa la API: {error_msg}. Jaribu tena."
+        return "Samahani, sikupata jibu kutoka kwa mfumo wa lugha."
+    except requests.exceptions.RequestException as e:
+        return f"Kosa la Mtandao: Imeshindwa kuunganisha na API. Kosa: {e}"
     except Exception as e:
+        return f"Kosa lisilotarajiwa: {e}"
 # --- Gradio Interface ---
+gr.ChatInterface(
+    fn=generate_response,
+    title="Zero-Memory Swahili/Sheng Chatbot (API-Powered)",
+    description="Chat na AI kwa Kiswahili au Sheng! Inajua mambo ya Nurse Toto na mambo mengine ya kawaida. **Kumbuka:** Unahitaji kuweka siri ya **HF_TOKEN** katika mipangilio ya Space.",
+    examples=[
+        ["Habari yako? Unaweza kunisaidia nini leo?"],
+        ["Nieleze kuhusu Casypool kwenye Nurse Toto."],
+        ["Sheng ya 'How are you' ni gani?"],
+        ["Mzee alitaka kufanya nini hospitalini?"],
+    ]
+).launch()