Upload 2 files
Browse files- app (1).py +301 -0
- requirements (1).txt +8 -0
app (1).py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 4 |
+
from langchain.text_splitter import CharacterTextSplitter
|
| 5 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 6 |
+
from langchain_community.vectorstores import FAISS
|
| 7 |
+
from langchain.chains import RetrievalQA
|
| 8 |
+
|
| 9 |
+
# --- Configuration ---
|
| 10 |
+
MODEL_NAME = "Jacaranda/UlizaLlama3" # Best Swahili LLM, but may require a paid GPU Space
|
| 11 |
+
# Alternative for free CPU Space: "CraneAILabs/swahili-gemma-1b-litert"
|
| 12 |
+
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
| 13 |
+
TRANSCRIPT_FILE = "nurse_toto_episode_1_transcript.md"
|
| 14 |
+
|
| 15 |
+
# --- Transcript Data (for RAG) ---
|
| 16 |
+
# The full transcript is loaded here. In a real scenario, this would be loaded from a file.
|
| 17 |
+
# For simplicity and deployment, we'll embed the content directly.
|
| 18 |
+
NURSE_TOTO_TRANSCRIPT = """
|
| 19 |
+
# A Nurse Toto - Episode 1: Mzee wa Kutahirii (Kiswahili Transcript)
|
| 20 |
+
|
| 21 |
+
**Series:** A Nurse Toto
|
| 22 |
+
**Episode:** 1 - Mzee wa Kutahirii
|
| 23 |
+
**Creator:** Eddie Butita
|
| 24 |
+
**Language:** Kiswahili (with some English/Sheng)
|
| 25 |
+
|
| 26 |
+
---
|
| 27 |
+
|
| 28 |
+
## Scene 1: The Date
|
| 29 |
+
**Characters:** Casypool (Mzee), Girlfriend
|
| 30 |
+
|
| 31 |
+
**Casypool:** My dear, mimi sio kama wale wanaume wengine wenye wanachukua mademu, wanawause, wanawadump. I'm an honorable man. That's why President Ruto ananipenda.
|
| 32 |
+
|
| 33 |
+
**Girlfriend:** Baby, you're lying. Yaani uko na connections kama hizo na uko tu hapa hivi you don't even brag. Aky you're so humble.
|
| 34 |
+
|
| 35 |
+
**Casypool:** Well, humility is my second name. I'm a man of my word. Na nikisema by the way, I will wait until marriage. Nita wait until marriage.
|
| 36 |
+
|
| 37 |
+
**Girlfriend:** Aky baby, you are so sweet and genuine. That's what I love about you. Anyway, umesema unarudi kutoka US lini?
|
| 38 |
+
|
| 39 |
+
**Casypool:** Well, I'll be back in two months time. But don't worry. I'll be sending you money to have fun with your friends. Pesa kidogo tu kama 100,000, 200,000, 300. I hope hiyo itakuwa ni kidogo itakutosha.
|
| 40 |
+
|
| 41 |
+
**Girlfriend:** Hiyo sio kidogo babe. Aky babe you are so sweet and young at heart. Yaani wewe ni mzee na roho ya kijana. Daudi Kabaka na roho ya Butros. I knew it wewe si kama hao baba wengine.
|
| 42 |
+
|
| 43 |
+
**Casypool:** Come on, don't call me mbaba. I'm a promoted youth with so many years experience. So, by the way, unafanya wapi kazi?
|
| 44 |
+
|
| 45 |
+
**Casypool:** I'm right now I'm into real estate. But nafanya kazi kama nurse kwa private hospital ya VIPs pekee yake. Lakini nataka kwenda Canada very soon ndio niende kufanya kazi huko. Nataka uanze kuwa na kiburi. Nataka uanze kutukanatukana boss. Nataka uanze kumea pembe kwa sababu nataka kubadilisha life yako. When I'm back in two months, we settle down. No rushing things. I will wait. Not touching you, no nothing until marriage.
|
| 46 |
+
|
| 47 |
+
**Girlfriend:** Aky babe, you are so sweet. Okay. Let me leave you some cash. Uende upige sherehe. Utoe stress leo because I'm flying out of the country tonight. And I know you'll miss me the way I'll miss you.
|
| 48 |
+
|
| 49 |
+
**Girlfriend:** I will miss you.
|
| 50 |
+
|
| 51 |
+
---
|
| 52 |
+
|
| 53 |
+
## Intro
|
| 54 |
+
**Narrator:** Anaitwa Wambo_ alipiga C yake smart pale high school kimchezo akagraduate kikomrade with the help of Mkenya na akapata job kama nurse na maguide wenzake. Karibu facility one. Hiyo si manasi ni generation Z. Doc na wadosi ni millennials na wagonjwa ni generation zote.
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
## Scene 2: Hospital Reception
|
| 59 |
+
**Characters:** Maryanne (Receptionist), Casypool
|
| 60 |
+
|
| 61 |
+
**Maryanne:** Habari yako, kasichana?
|
| 62 |
+
**Casypool:** Ni Maryanne, si kasichana.
|
| 63 |
+
**Maryanne:** Na una kiburi. Na wewe ni receptionist tu. Ungekuwa daktari, si ungekuwa unapepea huko juu na mabawa?
|
| 64 |
+
**Maryanne:** Pole, naweza kusaidia aje?
|
| 65 |
+
**Casypool:** Nataka kutahiri.
|
| 66 |
+
**Maryanne:** Eh?
|
| 67 |
+
**Casypool:** Ni nini hunielewi? Nimesema nataka kutahiri.
|
| 68 |
+
**Maryanne:** Anyway, sikuwa na ubaya. Nataka tu kuchukua details. Utalipa 500 ya registration, and then uende consultation, utasaidika.
|
| 69 |
+
**Casypool:** Nikuulize, kama sina 500 ya registration, inamaanisha hutaniregister?
|
| 70 |
+
**Maryanne:** Eh, for first timers. Ama ushaitahiri hapa before?
|
| 71 |
+
**Casypool:** Msichana una kiburi. How old are you? Hii maneno ya registration, consultation, ni ya nini? Unataka kutengeneza pesa kwa kutahiri kwangu? You want to profit from my private organization?
|
| 72 |
+
**Maryanne:** Eh, mzee, tulia usaidike. Wengine wakitaka kuretire, wewe unakuja hapa kutahiri. Anyway, full details. Name?
|
| 73 |
+
**Casypool:** Kasipul Kapon.
|
| 74 |
+
**Maryanne:** Age?
|
| 75 |
+
**Casypool:** 21.
|
| 76 |
+
**Maryanne:** Mzee, kuwa serious. Kama wewe ni mamangu, basi andika age mine.
|
| 77 |
+
**Maryanne:** Mzee, uko na miaka ngapi?
|
| 78 |
+
**Casypool:** Basi, kama itakufurahisha, 52.
|
| 79 |
+
**Maryanne:** Wacha nisiongee vibaya. Next of kin?
|
| 80 |
+
**Casypool:** Ah, weka Catherine. Ah, ah, no, no, no, no. Not Catherine. Janet. Ah, no. Usiweke Janet. Shiro. No. Ah, weka Martha.
|
| 81 |
+
**Maryanne:** Mzee, unajua unasumbua wewe? Hebu keti hapo. Utalipa 500 ya registration, utaona daktari na 1,000, alafu 15k, hiyo ni ya circumcision.
|
| 82 |
+
**Casypool:** Silipi kitu, niko na insurance.
|
| 83 |
+
**Maryanne:** Ni sawa, uko na insurance. But sasa sijui kama insurance inakava wazee wa umri yako kutahiri. Utangoja hapo usikie kama watakubali.
|
| 84 |
+
**Casypool:** Sasa, kitu ya kutokutahiri, utaenda kutangazia insurance ati sijatahiri?
|
| 85 |
+
**Maryanne:** Mzee, lakini vitu zingine ni za kujisimamia. Hizi ni aibu gani za ati, "Oh, mzee wa 52 years, circumcision na NHIF." Surely. Surely.
|
| 86 |
+
|
| 87 |
+
---
|
| 88 |
+
|
| 89 |
+
## Scene 3: Waiting Room
|
| 90 |
+
**Characters:** Wambo, Other Patient
|
| 91 |
+
|
| 92 |
+
**Wambo:** Niko hospitali, nilikuwa nimeshikwa na malaria kidogo. Nakupigia. Wacha nitibiwe, nakupigia.
|
| 93 |
+
**Man:** Eh, ndugu yangu. Naomba unichachawize kidogo ili unieleweshe kimantiki. Hivi mbona imekuchukua muda mrefu kabla hujakuwa ndume? Maanake hapo umekuja kupoteza hela, ndugu yangu. Si uongo. Mimi nina plan nzuri kabisa. Maanake siamini haya mambo ya kisayansi.
|
| 94 |
+
**Casypool:** Wewe, my friend, mind your own business. Wanaume wenzako wako busy kwa ofisi wanapiga ma deal. Wewe uko busy hapa unapiga dasta. Chunga mdomo yako na umind your own business.
|
| 95 |
+
**Man:** Ah, shakum sima tusi. Ila sijakusudia kukukera nyongo. Ni mtazamo tu. Eh? Hata wewe, mabingwa wenzako huku hivyo wanafanya kazi, kule hiv...
|
| 96 |
+
|
| 97 |
+
---
|
| 98 |
+
|
| 99 |
+
## Scene 4: Doctor's Consultation
|
| 100 |
+
**Characters:** Doc Shifta, Casypool, Maryanne
|
| 101 |
+
|
| 102 |
+
**Doctor:** Mzee, please, please. Naomba utoe akili kwa lodging, rudisha akili hospitali. Eh, Maryanne. Kuja ushughulike... Hebu mpeleke huko, get him ready.
|
| 103 |
+
**Maryanne:** Guys, daktari amesema mprepare your patient, anakuja kumsaidia. Ndio huyo.
|
| 104 |
+
**Sly:** Okay, kuna venye yeye si mtoto, ako 52 years.
|
| 105 |
+
**Doctor:** Eh? 52?
|
| 106 |
+
**Sly:** Eh?
|
| 107 |
+
**Doctor:** 52?
|
| 108 |
+
**Maryanne:** I know.
|
| 109 |
+
**Doctor:** Eh. Mwambie basi akuje.
|
| 110 |
+
**Maryanne:** Sawa.
|
| 111 |
+
**Doctor:** Eh.
|
| 112 |
+
**Kasipul:** Habari yako?
|
| 113 |
+
**Doctor:** So, wewe ndio daktari?
|
| 114 |
+
**Doctor:** Hapana, mimi ni KDF. Hapa niko Somalia, napigana na Al-Shabaab.
|
| 115 |
+
**Kasipul:** Na hii kibanda yenu mnaita hospitali, mna kiburi all the way kutoka kwa reception mpaka hapa hivi by the way.
|
| 116 |
+
**Doctor:** Wewe ndio wa kutahiri?
|
| 117 |
+
**Kasipul:** My friend, nina jina naitwa Kasipul. Umesikia?
|
| 118 |
+
**Doctor:** Okay, fanya kazi yangu iwe rahisi. Tusianze kubishana. I want my work to be easier. Unasikia joto ama baridi?
|
| 119 |
+
**Kasipul:** Wapi?
|
| 120 |
+
**Doctor:** Eh, mwili. Mwili mzee.
|
| 121 |
+
**Kasipul:** Nikuulize very simple question. Joto na baridi inaingiliana wapi na kutahiri?
|
| 122 |
+
**Doctor:** Sasa unataka kujitahiri? Si utulie, usikie maswali. Unataka kujitahiri? Unakohoa ama hukohoi?
|
| 123 |
+
**Kasipul:** Ndio nauliza the same question. Kukohoa na kutahiri inaingiliana wapi? Labda hujui, nakuambia kukohoa na kukatwa inaingiliana wapi?
|
| 124 |
+
**Doctor:** Tumbo inakuuma?
|
| 125 |
+
**Kasipul:** Hapana, tumbo hainiumi.
|
| 126 |
+
**Doctor:** Jana ulikula nini?
|
| 127 |
+
**Kasipul:** Githeri special.
|
| 128 |
+
**Doctor:** Ndio maana tumbo inaongea Kikuyu. Sasa utaenda room number four hapo, utaonyeshwa. Uende upimwe typhoid, malaria, TB, pressure, upimwe urine, upimwe stool, na super gonorrhea. Alafu wakutengeneze hapo, utoe nguo, na kuja kushughulikiwa.
|
| 129 |
+
**Kasipul:** Hiyo ya mwisho ndio imenichanganya kidogo. Mbona sikutofahamu? Kutoa nguo, wewe mtoto wa kiume unakuja kunishughulikia. Mbona inaingia hapo?
|
| 130 |
+
**Doctor:** Mzee, please, please. Naomba utoe akili kwa lodging, rudisha akili hospitali. Eh, Maryanne. Kuja ushughulike... Hebu mpeleke huko, get him ready.
|
| 131 |
+
|
| 132 |
+
---
|
| 133 |
+
|
| 134 |
+
## Scene 5: The Nurses' "Trauma"
|
| 135 |
+
**Characters:** Wambo, Sly, Maryanne
|
| 136 |
+
|
| 137 |
+
**Wambo:** Guys, please pray for us. I think we've seen it all.
|
| 138 |
+
**Sly:** We are so scared.
|
| 139 |
+
**Wambo:** Guys, imagine we are about to handle the circumcision of a 52-year-old.
|
| 140 |
+
**Sly:** 52 years old!
|
| 141 |
+
**Wambo:** We are so traumatized.
|
| 142 |
+
**Sly:** Na mimi sijui kuhandle nyoka.
|
| 143 |
+
**Wambo:** Na mimi... I'm even getting an anxiety attack. We are so traumatized. Please pray for us. Please pray for us.
|
| 144 |
+
**Sly:** Lakini msijali, we will upload the full video right after this.
|
| 145 |
+
**Wambo:** 'Cause we will record everything.
|
| 146 |
+
**Sly:** Don't forget to like, share, subscribe, and follow us.
|
| 147 |
+
**Both:** And that's on period!
|
| 148 |
+
**Maryanne:** Guys, daktari amesema mprepare your patient, anakuja kumsaidia. Ndio huyo.
|
| 149 |
+
**Sly:** Babe.
|
| 150 |
+
**Wambo:** America.
|
| 151 |
+
**Sly:** Babe, huyu ndio yule new catch?
|
| 152 |
+
**Wambo:** [Laughs]
|
| 153 |
+
**Sly:** Ndio maana ulikuwa unasema tungoje, sindio?
|
| 154 |
+
"""
|
| 155 |
+
|
| 156 |
+
# --- Model and RAG Setup ---
|
| 157 |
+
# Global variables to hold the model and RAG chain
|
| 158 |
+
tokenizer = None
|
| 159 |
+
model = None
|
| 160 |
+
rag_chain = None
|
| 161 |
+
|
| 162 |
+
def setup_rag_chain():
|
| 163 |
+
"""Initializes the LLM, tokenizer, and RAG chain."""
|
| 164 |
+
global tokenizer, model, rag_chain
|
| 165 |
+
|
| 166 |
+
if rag_chain is not None:
|
| 167 |
+
return
|
| 168 |
+
|
| 169 |
+
# 1. Load the Swahili LLM (using a smaller model for deployment)
|
| 170 |
+
# Note: For a free Hugging Face Space, a small model is necessary.
|
| 171 |
+
# The UlizaLlama3 is 8B and will likely require a paid GPU.
|
| 172 |
+
# We will use a placeholder for the code, but advise the user.
|
| 173 |
+
try:
|
| 174 |
+
print(f"Loading tokenizer and model: {MODEL_NAME}...")
|
| 175 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 176 |
+
# Load in 4-bit for memory efficiency
|
| 177 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 178 |
+
MODEL_NAME,
|
| 179 |
+
load_in_4bit=True,
|
| 180 |
+
torch_dtype=torch.bfloat16,
|
| 181 |
+
device_map="auto"
|
| 182 |
+
)
|
| 183 |
+
print("Model loaded successfully.")
|
| 184 |
+
except Exception as e:
|
| 185 |
+
print(f"Error loading model {MODEL_NAME}. Falling back to a dummy model. Error: {e}")
|
| 186 |
+
# Fallback for local testing or if the model is too large for the environment
|
| 187 |
+
def dummy_llm(prompt):
|
| 188 |
+
return "Samahani, mfumo wa lugha haupatikani. Hata hivyo, ninaweza kujibu maswali kuhusu 'Nurse Toto' kulingana na maandishi."
|
| 189 |
+
rag_chain = dummy_llm
|
| 190 |
+
return
|
| 191 |
+
|
| 192 |
+
# 2. Create documents from the transcript
|
| 193 |
+
text_splitter = CharacterTextSplitter(
|
| 194 |
+
separator="\n\n",
|
| 195 |
+
chunk_size=1000,
|
| 196 |
+
chunk_overlap=200,
|
| 197 |
+
length_function=len,
|
| 198 |
+
)
|
| 199 |
+
texts = text_splitter.create_documents([NURSE_TOTO_TRANSCRIPT])
|
| 200 |
+
|
| 201 |
+
# 3. Create embeddings and vector store
|
| 202 |
+
print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}...")
|
| 203 |
+
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
|
| 204 |
+
print("Creating FAISS vector store...")
|
| 205 |
+
db = FAISS.from_documents(texts, embeddings)
|
| 206 |
+
retriever = db.as_retriever(search_kwargs={"k": 3})
|
| 207 |
+
|
| 208 |
+
# 4. Setup the RAG chain
|
| 209 |
+
# We'll use a simple pipeline for generation and integrate it with the retriever manually
|
| 210 |
+
# to avoid complex LangChain dependencies that might fail on a free Space.
|
| 211 |
+
|
| 212 |
+
# A simple function to format the prompt for the LLM
|
| 213 |
+
def format_prompt(context, question):
|
| 214 |
+
# This is a general instruction prompt for the LLM
|
| 215 |
+
system_prompt = (
|
| 216 |
+
"Wewe ni mtaalamu wa mazungumzo ya Kiswahili na Sheng. "
|
| 217 |
+
"Jibu maswali ya mtumiaji kwa kutumia muktadha uliotolewa kutoka kwa "
|
| 218 |
+
"maandishi ya 'A Nurse Toto' Episode 1. Ikiwa jibu halipatikani kwenye "
|
| 219 |
+
"muktadha, jibu kwa heshima kwamba huna habari hiyo, lakini bado "
|
| 220 |
+
"tumia lugha ya Kiswahili au Sheng."
|
| 221 |
+
)
|
| 222 |
+
return f"{system_prompt}\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:"
|
| 223 |
+
|
| 224 |
+
# A simple function to run the RAG process
|
| 225 |
+
def rag_qa(question):
|
| 226 |
+
# 1. Retrieve context
|
| 227 |
+
docs = retriever.get_relevant_documents(question)
|
| 228 |
+
context = "\n---\n".join([doc.page_content for doc in docs])
|
| 229 |
+
|
| 230 |
+
# 2. Format prompt
|
| 231 |
+
prompt = format_prompt(context, question)
|
| 232 |
+
|
| 233 |
+
# 3. Generate response
|
| 234 |
+
# Using the Hugging Face pipeline for text generation
|
| 235 |
+
pipe = pipeline(
|
| 236 |
+
"text-generation",
|
| 237 |
+
model=model,
|
| 238 |
+
tokenizer=tokenizer,
|
| 239 |
+
max_new_tokens=256,
|
| 240 |
+
do_sample=True,
|
| 241 |
+
temperature=0.7,
|
| 242 |
+
top_p=0.9,
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
# The model will generate the prompt and the answer, so we need to clean the output
|
| 246 |
+
output = pipe(prompt)[0]['generated_text']
|
| 247 |
+
|
| 248 |
+
# Simple cleaning to extract only the answer part
|
| 249 |
+
if "Answer:" in output:
|
| 250 |
+
answer = output.split("Answer:", 1)[-1].strip()
|
| 251 |
+
else:
|
| 252 |
+
answer = output.split(prompt, 1)[-1].strip() # Fallback
|
| 253 |
+
|
| 254 |
+
return answer
|
| 255 |
+
|
| 256 |
+
rag_chain = rag_qa
|
| 257 |
+
print("RAG chain initialized.")
|
| 258 |
+
|
| 259 |
+
# --- Gradio Interface ---
|
| 260 |
+
|
| 261 |
+
def chat_function(message, history):
|
| 262 |
+
"""The main function for the Gradio chat interface."""
|
| 263 |
+
if rag_chain is None:
|
| 264 |
+
# Attempt to set up the chain on the first message if it failed before
|
| 265 |
+
setup_rag_chain()
|
| 266 |
+
if rag_chain is None:
|
| 267 |
+
return "Samahani, mfumo wa lugha haukuweza kupakiwa. Tafadhali jaribu tena baadaye."
|
| 268 |
+
|
| 269 |
+
# The history is not used for RAG, as it's a simple QA chain.
|
| 270 |
+
# For a conversational model, history would be included in the prompt.
|
| 271 |
+
response = rag_chain(message)
|
| 272 |
+
return response
|
| 273 |
+
|
| 274 |
+
# Initialize the RAG chain on startup
|
| 275 |
+
setup_rag_chain()
|
| 276 |
+
|
| 277 |
+
# Define the Gradio interface
|
| 278 |
+
if rag_chain is not None:
|
| 279 |
+
gr.ChatInterface(
|
| 280 |
+
fn=chat_function,
|
| 281 |
+
title="Nurse Toto Kiswahili/Sheng Chatbot (RAG)",
|
| 282 |
+
description=(
|
| 283 |
+
"Uliza maswali kuhusu maandishi ya 'A Nurse Toto' Episode 1 kwa Kiswahili au Sheng. "
|
| 284 |
+
"Mfumo huu unatumia **Retrieval-Augmented Generation (RAG)** na model ya Kiswahili "
|
| 285 |
+
f"kutoka Hugging Face ({MODEL_NAME}) kujibu maswali yako."
|
| 286 |
+
),
|
| 287 |
+
examples=[
|
| 288 |
+
["Casypool ana miaka mingapi?"],
|
| 289 |
+
["Wambo na Sly walisema nini kuhusu mgonjwa?"],
|
| 290 |
+
["Mzee alikula nini jana?"],
|
| 291 |
+
["Nani alikuwa mroho kama magwanda ya mekanika?"],
|
| 292 |
+
["Mzee alitaka kufanya nini hospitalini?"],
|
| 293 |
+
]
|
| 294 |
+
).launch()
|
| 295 |
+
else:
|
| 296 |
+
gr.Interface(
|
| 297 |
+
fn=lambda x: "Model loading failed. Check logs for details.",
|
| 298 |
+
inputs="text",
|
| 299 |
+
outputs="text",
|
| 300 |
+
title="Chatbot Initialization Failed"
|
| 301 |
+
).launch()
|
requirements (1).txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
transformers
|
| 3 |
+
accelerate
|
| 4 |
+
bitsandbytes
|
| 5 |
+
langchain
|
| 6 |
+
sentence-transformers
|
| 7 |
+
faiss-cpu
|
| 8 |
+
gradio
|