Spaces:

Stanley03
/

testswa

Sleeping

App Files Files Community

Stanley03 commited on Jan 24

Commit

4666f34

verified ·

1 Parent(s): 89509ee

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -126

app.py CHANGED Viewed

@@ -1,23 +1,18 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from langchain_text_splitters import CharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
-# from langchain.chains import RetrievalQA # Not used in this RAG implementation
 # --- Configuration ---
-MODEL_NAME = "Jacaranda/UlizaLlama3" # Best Swahili LLM, but may require a paid GPU Space
-# Alternative for free CPU Space: "CraneAILabs/swahili-gemma-1b-litert"
 EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
-TRANSCRIPT_FILE = "nurse_toto_episode_1_transcript.md"
-# --- Transcript Data (for RAG) ---
-# The full transcript is loaded here. In a real scenario, this would be loaded from a file.
-# For simplicity and deployment, we'll embed the content directly.
 NURSE_TOTO_TRANSCRIPT = """
 # A Nurse Toto - Episode 1: Mzee wa Kutahirii (Kiswahili Transcript)
 **Series:** A Nurse Toto
 **Episode:** 1 - Mzee wa Kutahirii
 **Creator:** Eddie Butita
@@ -81,7 +76,7 @@ NURSE_TOTO_TRANSCRIPT = """
 **Maryanne:** Mzee, unajua unasumbua wewe? Hebu keti hapo. Utalipa 500 ya registration, utaona daktari na 1,000, alafu 15k, hiyo ni ya circumcision.
 **Casypool:** Silipi kitu, niko na insurance.
 **Maryanne:** Ni sawa, uko na insurance. But sasa sijui kama insurance inakava wazee wa umri yako kutahiri. Utangoja hapo usikie kama watakubali.
-**Casypool:** Sasa, kitu ya kutokutahiri, utaenda kutangazia insurance ati sijatahiri?
 **Maryanne:** Mzee, lakini vitu zingine ni za kujisimamia. Hizi ni aibu gani za ati, "Oh, mzee wa 52 years, circumcision na NHIF." Surely. Surely.
 ---
@@ -153,148 +148,100 @@ NURSE_TOTO_TRANSCRIPT = """
 **Sly:** Ndio maana ulikuwa unasema tungoje, sindio?
 """
-# --- Model and RAG Setup ---
-# Global variables to hold the model and RAG chain
 tokenizer = None
 model = None
-rag_chain = None
-def setup_rag_chain():
-    """Initializes the LLM, tokenizer, and RAG chain."""
-    global tokenizer, model, rag_chain
-    if rag_chain is not None:
-        return
-    # 1. Load the Swahili LLM (using a smaller model for deployment)
-    # Note: For a free Hugging Face Space, a small model is necessary.
-    # The UlizaLlama3 is 8B and will likely require a paid GPU.
-    # We will use a placeholder for the code, but advise the user.
     try:
         print(f"Loading tokenizer and model: {MODEL_NAME}...")
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-        # Load in 4-bit for memory efficiency
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_NAME,
-            load_in_4bit=True,
-            torch_dtype=torch.bfloat16,
-            device_map="auto"
         )
         print("Model loaded successfully.")
     except Exception as e:
-        print(f"Error loading model {MODEL_NAME}. Falling back to a dummy model. Error: {e}")
-        # Fallback for local testing or if the model is too large for the environment
-        def dummy_llm(prompt):
-            return "Samahani, mfumo wa lugha haupatikani. Hata hivyo, ninaweza kujibu maswali kuhusu 'Nurse Toto' kulingana na maandishi."
-        rag_chain = dummy_llm
-        return
-    # 2. Create documents from the transcript
-    text_splitter = CharacterTextSplitter(
-        separator="\n\n",
-        chunk_size=1000,
-        chunk_overlap=200,
-        length_function=len,
     )
-    texts = text_splitter.create_documents([NURSE_TOTO_TRANSCRIPT])
-    # 3. Create embeddings and vector store
-    print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}...")
-    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
-    print("Creating FAISS vector store...")
-    db = FAISS.from_documents(texts, embeddings)
-    retriever = db.as_retriever(search_kwargs={"k": 3})
-    # 4. Setup the RAG chain
-    # We'll use a simple pipeline for generation and integrate it with the retriever manually
-    # to avoid complex LangChain dependencies that might fail on a free Space.
-    # A simple function to format the prompt for the LLM
-    def format_prompt(context, question):
-        # This is a general instruction prompt for the LLM
-        system_prompt = (
-            "Wewe ni mtaalamu wa mazungumzo ya Kiswahili na Sheng. "
-            "Jibu maswali ya mtumiaji kwa kutumia muktadha uliotolewa kutoka kwa "
-            "maandishi ya 'A Nurse Toto' Episode 1. Ikiwa jibu halipatikani kwenye "
-            "muktadha, jibu kwa heshima kwamba huna habari hiyo, lakini bado "
-            "tumia lugha ya Kiswahili au Sheng."
-        )
-        return f"{system_prompt}\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:"
-    # A simple function to run the RAG process
-    def rag_qa(question):
-        # 1. Retrieve context
-        docs = retriever.get_relevant_documents(question)
-        context = "\n---\n".join([doc.page_content for doc in docs])
-        # 2. Format prompt
-        prompt = format_prompt(context, question)
-        # 3. Generate response
-        # Using the Hugging Face pipeline for text generation
-        pipe = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
             max_new_tokens=256,
             do_sample=True,
             temperature=0.7,
-            top_p=0.9,
         )
-        # The model will generate the prompt and the answer, so we need to clean the output
-        output = pipe(prompt)[0]['generated_text']
-        # Simple cleaning to extract only the answer part
-        if "Answer:" in output:
-            answer = output.split("Answer:", 1)[-1].strip()
-        else:
-            answer = output.split(prompt, 1)[-1].strip() # Fallback
-        return answer
-    rag_chain = rag_qa
-    print("RAG chain initialized.")
-# --- Gradio Interface ---
-def chat_function(message, history):
-    """The main function for the Gradio chat interface."""
-    if rag_chain is None:
-        # Attempt to set up the chain on the first message if it failed before
-        setup_rag_chain()
-        if rag_chain is None:
-            return "Samahani, mfumo wa lugha haukuweza kupakiwa. Tafadhali jaribu tena baadaye."
-    # The history is not used for RAG, as it's a simple QA chain.
-    # For a conversational model, history would be included in the prompt.
-    response = rag_chain(message)
     return response
-# Initialize the RAG chain on startup
-setup_rag_chain()
-# Define the Gradio interface
-if rag_chain is not None:
     gr.ChatInterface(
-        fn=chat_function,
-        title="Nurse Toto Kiswahili/Sheng Chatbot (RAG)",
-        description=(
-            "Uliza maswali kuhusu maandishi ya 'A Nurse Toto' Episode 1 kwa Kiswahili au Sheng. "
-            "Mfumo huu unatumia **Retrieval-Augmented Generation (RAG)** na model ya Kiswahili "
-            f"kutoka Hugging Face ({MODEL_NAME}) kujibu maswali yako."
-        ),
         examples=[
-            ["Casypool ana miaka mingapi?"],
-            ["Wambo na Sly walisema nini kuhusu mgonjwa?"],
-            ["Mzee alikula nini jana?"],
-            ["Nani alikuwa mroho kama magwanda ya mekanika?"],
             ["Mzee alitaka kufanya nini hospitalini?"],
         ]
     ).launch()
 else:
     gr.Interface(
-        fn=lambda x: "Model loading failed. Check logs for details.",
         inputs="text",
         outputs="text",
         title="Chatbot Initialization Failed"

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from langchain_text_splitters import CharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 # --- Configuration ---
+# Switching to the smallest available Swahili model (1B) for guaranteed free CPU hosting
+MODEL_NAME = "CraneAILabs/swahili-gemma-1b-litert"
 EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+# --- Transcript Data ---
 NURSE_TOTO_TRANSCRIPT = """
 # A Nurse Toto - Episode 1: Mzee wa Kutahirii (Kiswahili Transcript)
 **Series:** A Nurse Toto
 **Episode:** 1 - Mzee wa Kutahirii
 **Creator:** Eddie Butita
 **Maryanne:** Mzee, unajua unasumbua wewe? Hebu keti hapo. Utalipa 500 ya registration, utaona daktari na 1,000, alafu 15k, hiyo ni ya circumcision.
 **Casypool:** Silipi kitu, niko na insurance.
 **Maryanne:** Ni sawa, uko na insurance. But sasa sijui kama insurance inakava wazee wa umri yako kutahiri. Utangoja hapo usikie kama watakubali.
+**Casipul:** Sasa, kitu ya kutokutahiri, utaenda kutangazia insurance ati sijatahiri?
 **Maryanne:** Mzee, lakini vitu zingine ni za kujisimamia. Hizi ni aibu gani za ati, "Oh, mzee wa 52 years, circumcision na NHIF." Surely. Surely.
 ---
 **Sly:** Ndio maana ulikuwa unasema tungoje, sindio?
 """
+# --- Global Variables ---
 tokenizer = None
 model = None
+vector_db = None
+def setup_system():
+    """Initializes the LLM and the Vector Database for RAG."""
+    global tokenizer, model, vector_db
     try:
         print(f"Loading tokenizer and model: {MODEL_NAME}...")
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        # Load model explicitly on CPU with a memory-safe dtype
+        # We are using the smallest available model (1B) to maximize chances of success on the free tier.
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_NAME,
+            torch_dtype=torch.float32, # Safer for CPU-only environments
+            device_map="cpu" # Explicitly set to CPU to avoid auto-detection issues
         )
+        model.eval()
         print("Model loaded successfully.")
+        # Setup Vector DB for RAG
+        text_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=1000, chunk_overlap=200)
+        texts = text_splitter.create_documents([NURSE_TOTO_TRANSCRIPT])
+        print("Creating embeddings and vector store...")
+        embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
+        vector_db = FAISS.from_documents(texts, embeddings)
+        print("System setup complete.")
+        return True
     except Exception as e:
+        print(f"FATAL ERROR: Model loading failed. This is likely due to memory constraints. Error: {e}")
+        # If model loading fails, we cannot proceed with the chatbot.
+        return False
+def generate_response(message, history):
+    """Main chat function supporting both general chat and RAG."""
+    # Check if the model is loaded. If not, return the error message.
+    if model is None:
+        return "Samahani, mfumo wa lugha haukuweza kupakiwa kwa sababu ya matatizo ya kumbukumbu (memory issues). Tafadhali jaribu tena baadaye au tumia mfumo mdogo zaidi."
+    # 1. Retrieve relevant context from the transcript
+    docs = vector_db.similarity_search(message, k=2)
+    context = "\n".join([doc.page_content for doc in docs])
+    # 2. Construct the prompt
+    # We provide the context but instruct the model it can also chat generally.
+    system_prompt = (
+        "Wewe ni msaidizi wa AI unayezungumza Kiswahili na Sheng. "
+        "Unaweza kufanya mazungumzo ya kawaida au kujibu maswali kuhusu 'Nurse Toto' "
+        "kwa kutumia muktadha uliotolewa hapa chini. "
+        "Ikiwa swali halihusiani na Nurse Toto, jibu kwa kutumia maarifa yako ya jumla."
     )
+    full_prompt = f"{system_prompt}\n\nMuktadha wa Nurse Toto:\n{context}\n\nUser: {message}\nAssistant:"
+    # 3. Generate
+    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
             max_new_tokens=256,
             do_sample=True,
             temperature=0.7,
+            top_p=0.9
         )
+    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the assistant's response
+    response = full_output.split("Assistant:")[-1].strip()
     return response
+# Initialize the system. If it fails, the model will be None and the chat function will return an error.
+if setup_system():
+    # Launch Gradio only if setup was successful
     gr.ChatInterface(
+        fn=generate_response,
+        title="Lightweight Swahili/Sheng Chatbot (Nurse Toto RAG)",
+        description="Chat na AI kwa Kiswahili au Sheng! Inajua mambo ya Nurse Toto na mambo mengine ya kawaida.",
         examples=[
+            ["Habari yako? Unaweza kunisaidia nini leo?"],
+            ["Nieleze kuhusu Casypool kwenye Nurse Toto."],
+            ["Sheng ya 'How are you' ni gani?"],
             ["Mzee alitaka kufanya nini hospitalini?"],
         ]
     ).launch()
 else:
+    # If setup fails, launch a simple interface with an error message
     gr.Interface(
+        fn=lambda x: "Samahani, mfumo wa lugha haukuweza kupakiwa kwa sababu ya matatizo ya kumbukumbu (memory issues). Tafadhali jaribu tena baadaye au tumia mfumo mdogo zaidi.",
         inputs="text",
         outputs="text",
         title="Chatbot Initialization Failed"