Spaces:

BissakaAI
/

hamid

Sleeping

App Files Files Community

BissakaAI commited on Dec 12, 2025

Commit

1141e32

verified ·

1 Parent(s): 4bb2275

Delete awari-project (1).ipynb

Browse files

Files changed (1) hide show

awari-project (1).ipynb +0 -391

awari-project (1).ipynb DELETED Viewed

@@ -1,391 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-11T15:43:45.235265Z",
-     "iopub.status.busy": "2025-12-11T15:43:45.235029Z",
-     "iopub.status.idle": "2025-12-11T15:43:45.340285Z",
-     "shell.execute_reply": "2025-12-11T15:43:45.339518Z",
-     "shell.execute_reply.started": "2025-12-11T15:43:45.235247Z"
-    },
-    "trusted": true
-   },
-   "outputs": [],
-   "source": [
-    "from kaggle_secrets import UserSecretsClient\n",
-    "user_secrets = UserSecretsClient()\n",
-    "secret_value_0 = user_secrets.get_secret(\"HF_TOKEN\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-11T15:43:45.341357Z",
-     "iopub.status.busy": "2025-12-11T15:43:45.341102Z",
-     "iopub.status.idle": "2025-12-11T15:45:04.811675Z",
-     "shell.execute_reply": "2025-12-11T15:45:04.810916Z",
-     "shell.execute_reply.started": "2025-12-11T15:43:45.341333Z"
-    },
-    "trusted": true
-   },
-   "outputs": [],
-   "source": [
-    "pip install -U bitsandbytes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from transformers import (\n",
-    "    AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,\n",
-    "    AutoProcessor, SeamlessM4Tv2ForSpeechToText,\n",
-    "    VitsModel  # TTS\n",
-    ")\n",
-    "import torch\n",
-    "import soundfile as sf\n",
-    "import os\n",
-    "from kaggle_secrets import UserSecretsClient\n",
-    "\n",
-    "\n",
-    "# getting hftoken from kaggle secret\n",
-    "user_secrets = UserSecretsClient()\n",
-    "HF_TOKEN = user_secrets.get_secret(\"HF_TOKEN\")\n",
-    "print(\"hf_token retrieved\")\n",
-    "\n",
-    "\n",
-    "# using the bitsandbytes to quantize the model\n",
-    "bnb_config = BitsAndBytesConfig(load_in_8bit=True)\n",
-    "\n",
-    "#setting the device to use for runnning \n",
-    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
-    "\n",
-    "# loading Natlas model and  tokenizer\n",
-    "tokenizer = AutoTokenizer.from_pretrained(\n",
-    "    \"NCAIR1/N-ATLaS\",\n",
-    "    trust_remote_code=True,\n",
-    "    token=HF_TOKEN\n",
-    ")\n",
-    "\n",
-    "\n",
-    "model = AutoModelForCausalLM.from_pretrained(\n",
-    "    \"NCAIR1/N-ATLaS\",\n",
-    "    quantization_config=bnb_config,\n",
-    "    device_map=\"auto\",\n",
-    "    trust_remote_code=True,\n",
-    "    token=HF_TOKEN\n",
-    ")\n",
-    "\n",
-    "\n",
-    "\n",
-    "#an Asr model to convert speech to text\n",
-    "ASR_MODEL = \"facebook/seamless-m4t-v2-large\"\n",
-    "processor = AutoProcessor.from_pretrained(ASR_MODEL, token=HF_TOKEN)\n",
-    "asr_model = SeamlessM4Tv2ForSpeechToText.from_pretrained(ASR_MODEL, token=HF_TOKEN).to(device)\n",
-    "asr_model.eval()\n",
-    "\n",
-    "\n",
-    "# model to covert text back to speech \n",
-    "# load for hausa igbo,yoruba and english\n",
-    "tts_models = {}\n",
-    "for lang, tts_name in {\n",
-    "    \"yoruba\": \"facebook/mms-tts-yor\",\n",
-    "    # \"igbo\": \"facebook/mms-tts-ibo\",\n",
-    "    # \"hausa\": \"facebook/mms-tts-hau\",\n",
-    "}.items():\n",
-    "    print(f\"Loading TTS model for {lang}\")\n",
-    "    tts_proc = AutoProcessor.from_pretrained(tts_name, token=HF_TOKEN)\n",
-    "    tts_mod = VitsModel.from_pretrained(tts_name, token=HF_TOKEN).to(device)\n",
-    "    tts_mod.eval()\n",
-    "    tts_models[lang] = {\"processor\": tts_proc, \"model\": tts_mod}\n",
-    "\n",
-    "print(\"All the tts models loaded successfully!\")\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-11T15:45:04.813528Z",
-     "iopub.status.busy": "2025-12-11T15:45:04.813289Z",
-     "iopub.status.idle": "2025-12-11T15:49:56.343820Z",
-     "shell.execute_reply": "2025-12-11T15:49:56.343087Z",
-     "shell.execute_reply.started": "2025-12-11T15:45:04.813503Z"
-    },
-    "trusted": true
-   },
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import soundfile as sf\n",
-    "\n",
-    "\n",
-    "\n",
-    "# create a function to load text input\n",
-    "def textonly(user_msg: str):\n",
-    "    def format_prompt(messages):\n",
-    "        return tokenizer.apply_chat_template(\n",
-    "            messages,\n",
-    "            add_generation_prompt=True,\n",
-    "            tokenize=False\n",
-    "        )\n",
-    "\n",
-    "    chat = [\n",
-    "        {\"role\": \"system\", \"content\": \"You are a helpful model trained by Awarri AI Technologies.\"},\n",
-    "        {\"role\": \"user\", \"content\": user_msg}\n",
-    "    ]\n",
-    "\n",
-    "    final_text = format_prompt(chat)\n",
-    "    inputs = tokenizer(final_text, return_tensors=\"pt\").to(model.device)\n",
-    "\n",
-    "    with torch.no_grad():\n",
-    "        output_ids = model.generate(\n",
-    "            **inputs,\n",
-    "            max_new_tokens=200,\n",
-    "            temperature=0.1,\n",
-    "            repetition_penalty=1.12\n",
-    "        )\n",
-    "\n",
-    "    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)\n",
-    "    return response\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#create a function to handle speech input\n",
-    "def speechonly(speech, output_wav_path=\"response.wav\"):\n",
-    "    #the speech to text part \n",
-    "    inputs = processor(audios=speech, sampling_rate=16000, return_tensors=\"pt\").to(device)\n",
-    "    with torch.no_grad():\n",
-    "        predicted_ids = asr_model.generate(inputs[\"input_features\"], max_new_tokens=300)\n",
-    "        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]\n",
-    "\n",
-    "        print(\"\\nTRANSCRIPTION:\", transcription)\n",
-    "\n",
-    "\n",
-    "    #using Natlas LLM to handle the response \n",
-    "    def format_prompt(messages):\n",
-    "        return tokenizer.apply_chat_template(\n",
-    "            messages,\n",
-    "            add_generation_prompt=True,\n",
-    "            tokenize=False\n",
-    "        )\n",
-    "\n",
-    "    chat = [\n",
-    "        {\"role\": \"system\", \"content\": \"Respond ONLY in the detected Nigerian language (Yoruba, Igbo, Hausa, Pidgin, English).\"},\n",
-    "        {\"role\": \"user\", \"content\": transcription}\n",
-    "    ]\n",
-    "\n",
-    "    final_text = format_prompt(chat)\n",
-    "    inputs_llm = tokenizer(final_text, return_tensors=\"pt\").to(model.device)\n",
-    "\n",
-    "    with torch.no_grad():\n",
-    "        output_ids = model.generate(\n",
-    "            **inputs_llm,\n",
-    "            max_new_tokens=200,\n",
-    "            temperature=0.1,\n",
-    "            repetition_penalty=1.12\n",
-    "        )\n",
-    "\n",
-    "    llm_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)\n",
-    "    print(\"\\nllm response:\", llm_response)\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "    #natlas is a multilingual model designed for nigerian languages \n",
-    "    # its expected that it has a good understanding of the nigerian languages \n",
-    "    # using it to detect the language of the user input \n",
-    "    lang_prompt = [\n",
-    "        {\"role\": \"system\", \"content\": \"You are a Nigerian language expert.\"},\n",
-    "        {\"role\": \"user\", \"content\": f\"In which Nigerian language is this text: '{llm_response}'? Reply with only one of these: Yoruba, Igbo, Hausa, Pidgin, English.\"}\n",
-    "    ]\n",
-    "    lang_text = format_prompt(lang_prompt)\n",
-    "    lang_inputs = tokenizer(lang_text, return_tensors=\"pt\").to(model.device)\n",
-    "\n",
-    "    with torch.no_grad():\n",
-    "        lang_output_ids = model.generate(**lang_inputs, max_new_tokens=10)\n",
-    "\n",
-    "    llm_language = tokenizer.decode(lang_output_ids[0], skip_special_tokens=True).strip().lower()\n",
-    "    print(\"\\nLLM DETECTED LANGUAGE:\", llm_language)\n",
-    "\n",
-    "    # Picking TTS model based on LLM reply\n",
-    "  \n",
-    "    if llm_language not in tts_models:\n",
-    "        llm_language = \"english\"  \n",
-    "\n",
-    "    tts_processor = tts_models[llm_language][\"processor\"]\n",
-    "    tts_model = tts_models[llm_language][\"model\"]\n",
-    "\n",
-    "\n",
-    "    #to generate speech \n",
-    "\n",
-    "    # Process text\n",
-    "    tts_inputs = tts_processor(text=llm_response, return_tensors=\"pt\").to(device)\n",
-    "    with torch.no_grad():\n",
-    "        output = tts_model(**tts_inputs)\n",
-    "    audio_array = output.waveform.squeeze().cpu().numpy()\n",
-    "\n",
-    "    # Save WAV\n",
-    "    sf.write(output_wav_path, audio_array, 16000)\n",
-    "    return llm_response, output_wav_path"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "\n",
-    "# Ask user for input type\n",
-    "userinput = input(\"Enter 'text' or 'audio': \").lower()\n",
-    "\n",
-    "if userinput == \"text\":\n",
-    "    # Call text function\n",
-    "    answer1 = textonly()\n",
-    "    print(\"\\ntext response:\\n\", answer1)\n",
-    "\n",
-    "else:\n",
-    "    # Load and preprocess audio\n",
-    "    audio_path = \"/kaggle/input/recordings/Recording (3).m4a\"  \n",
-    "    audio = AudioSegment.from_file(audio_path)\n",
-    "    audio = audio.set_frame_rate(16000).set_channels(1)\n",
-    "    audio.export(\"/kaggle/working/audio.wav\", format=\"wav\")\n",
-    "\n",
-    "    speech, sr = librosa.load(\"/kaggle/working/audio.wav\", sr=16000)\n",
-    "    print(\"Converted audio loaded.\")\n",
-    "\n",
-    "    # Call speech function\n",
-    "    answer2 = speechonly(speech)\n",
-    "    print(\"\\nAUDIO RESPONSE saved as:\", answer2)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from fastapi import FastAPI\n",
-    "from pydantic import BaseModel\n",
-    "from pydub import AudioSegment\n",
-    "import librosa\n",
-    "import uvicorn\n",
-    "\n",
-    "app = FastAPI(title='Simple FastAPI App', version='1.0.0')\n",
-    "\n",
-    "@app.get(\"/\")\n",
-    "def root():\n",
-    "    return {\"Message\": \"Welcome to Healthatlas Application\"}\n",
-    "\n",
-    "\n",
-    "\n",
-    "class TextRequest(BaseModel):\n",
-    "    text: str\n",
-    "\n",
-    "\n",
-    "class SpeechRequest(BaseModel):\n",
-    "    input_audio_path: str  \n",
-    "    wav_output_path: str   \n",
-    "\n",
-    "\n",
-    "\n",
-    "@app.post(\"/textonly\")\n",
-    "def do_text(request: TextRequest):\n",
-    "    answer1 = textonly(request.text)\n",
-    "    print(\"\\nText response:\\n\", answer1)\n",
-    "    return {\"response\": answer1}\n",
-    "\n",
-    "\n",
-    "@app.post(\"/speechonly\")\n",
-    "def run_speech(request: SpeechRequest):\n",
-    "    audio = AudioSegment.from_file(request.input_audio_path)\n",
-    "    audio = audio.set_frame_rate(16000).set_channels(1)\n",
-    "    audio.export(request.wav_output_path, format=\"wav\")\n",
-    "\n",
-    "    speech, sr = librosa.load(request.wav_output_path, sr=16000)\n",
-    "    print(\"Converted audio loaded.\")\n",
-    "\n",
-    "\n",
-    "    answer2 = speechonly(speech)\n",
-    "\n",
-    "    return {\"response\": answer2, \"saved_wav\": request.wav_output_path}\n",
-    "\n",
-    "if __name__ == '__main__':\n",
-    "    print(os.getenv('host'))\n",
-    "    print(os.getenv('port'))\n",
-    "    uvicorn.run(app,host=os.getenv(\"host\"),port=int(os.getenv(\"port\")))"
-   ]
-  }
- ],
- "metadata": {
-  "kaggle": {
-   "accelerator": "nvidiaTeslaT4",
-   "dataSources": [
-    {
-     "datasetId": 8987240,
-     "sourceId": 14109383,
-     "sourceType": "datasetVersion"
-    }
-   ],
-   "dockerImageVersionId": 31193,
-   "isGpuEnabled": true,
-   "isInternetEnabled": true,
-   "language": "python",
-   "sourceType": "notebook"
-  },
-  "kernelspec": {
-   "display_name": "zoomcamp-pwCLAhn6",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}