mozzicato commited on
Commit
1e5a24e
·
verified ·
1 Parent(s): 3dca110
Files changed (1) hide show
  1. voc6.py +0 -794
voc6.py CHANGED
@@ -7,10 +7,6 @@ Original file is located at
7
  https://colab.research.google.com/drive/17WecCovbP3TgYvHDyZ4Yckj77r2q5Nam
8
  """
9
 
10
- !pip install langchain langchain-google-genai langchain-core sentence-transformers faiss-cpu numpy gradio
11
- !pip install langchain-google-genai
12
- # Cell 1: Install packages
13
- !pip install spitch gradio pydub python-dotenv
14
 
15
  # Cell to add FIRST - Your Original WemaRAGSystem
16
  import json
@@ -1546,793 +1542,3 @@ iface = create_voice_gradio_interface(
1546
  )
1547
 
1548
  iface.launch(share=True, debug=True)
1549
-
1550
- # ============================================================================
1551
- # Wema Bank Voice-Enabled RAG Chatbot with Spitch Integration - CORRECTED
1552
- # ============================================================================
1553
-
1554
- import tempfile
1555
- import os
1556
- import atexit
1557
- import glob
1558
- import io
1559
- from typing import Optional
1560
- from spitch import Spitch
1561
- import gradio as gr
1562
- from google.colab import userdata
1563
-
1564
-
1565
- # ============================================================================
1566
- # STEP 1: Initialize Spitch Client
1567
- # ============================================================================
1568
-
1569
- class SpitchVoiceHandler:
1570
- """
1571
- Handles all voice-related operations using Spitch API.
1572
- Supports multilingual speech-to-text and text-to-speech.
1573
- """
1574
-
1575
- def __init__(self, api_key: str):
1576
- """
1577
- Initialize Spitch client.
1578
-
1579
- Args:
1580
- api_key: Your Spitch API key
1581
- """
1582
- self.client = Spitch(api_key=api_key)
1583
-
1584
- def transcribe_audio(
1585
- self,
1586
- audio_file,
1587
- source_language: str = "en",
1588
- model: str = "mansa_v1"
1589
- ) -> str:
1590
- """
1591
- Transcribe audio to text using Spitch.
1592
- Supports multiple African and international languages.
1593
-
1594
- Args:
1595
- audio_file: Audio file path or file-like object
1596
- source_language: Language code (e.g., 'en', 'yo', 'ig', 'ha')
1597
- model: Spitch model to use (default: mansa_v1)
1598
-
1599
- Returns:
1600
- Transcribed text
1601
- """
1602
- try:
1603
- print(f"🎤 Transcribing audio file: {audio_file}")
1604
-
1605
- # If audio_file is a path, open it
1606
- if isinstance(audio_file, str):
1607
- with open(audio_file, 'rb') as f:
1608
- response = self.client.speech.transcribe(
1609
- content=f,
1610
- language=source_language,
1611
- model=model
1612
- )
1613
- else:
1614
- # Assume it's already a file-like object (from Gradio)
1615
- response = self.client.speech.transcribe(
1616
- content=audio_file,
1617
- language=source_language,
1618
- model=model
1619
- )
1620
-
1621
- print(f"Response type: {type(response)}")
1622
-
1623
- # ✅ Spitch transcribe returns a response object with .text or json()
1624
- if hasattr(response, 'text') and callable(response.text):
1625
- # It's a method, not an attribute
1626
- transcription_text = response.text()
1627
- elif hasattr(response, 'text'):
1628
- # It's an attribute
1629
- transcription_text = response.text
1630
- elif hasattr(response, 'json'):
1631
- # Try to parse JSON response
1632
- json_data = response.json()
1633
- transcription_text = json_data.get('text', str(json_data))
1634
- else:
1635
- # Try to convert response to string
1636
- transcription_text = str(response)
1637
-
1638
- print(f"✅ Transcription: {transcription_text}")
1639
- return transcription_text
1640
-
1641
- except Exception as e:
1642
- print(f"❌ Transcription error: {e}")
1643
- import traceback
1644
- traceback.print_exc()
1645
- return f"Sorry, I couldn't understand the audio. Error: {str(e)}"
1646
-
1647
- def translate_to_english(self, text: str, source_lang: str = "auto") -> str:
1648
- """
1649
- Translate text to English using Spitch translation API.
1650
-
1651
- Args:
1652
- text: Text to translate
1653
- source_lang: Source language code or 'auto' for auto-detection
1654
-
1655
- Returns:
1656
- Translated text in English
1657
- """
1658
- try:
1659
- # If already in English, return as is
1660
- if source_lang == "en":
1661
- return text
1662
-
1663
- print(f"🌍 Translating from {source_lang} to English...")
1664
- print(f"📝 Original text: {text}")
1665
-
1666
- translation = self.client.text.translate(
1667
- text=text,
1668
- source=source_lang,
1669
- target="en"
1670
- )
1671
-
1672
- english_text = translation.text
1673
- print(f"✅ Translated to English: {english_text}")
1674
-
1675
- return english_text
1676
-
1677
- except Exception as e:
1678
- error_msg = f"Translation failed: {str(e)}"
1679
- print(f"❌ {error_msg}")
1680
- import traceback
1681
- traceback.print_exc()
1682
- # Return original if translation fails
1683
- return text
1684
-
1685
- def synthesize_speech(
1686
- self,
1687
- text: str,
1688
- target_language: str = "en",
1689
- voice: str = "lina"
1690
- ) -> bytes:
1691
- """
1692
- Convert text to speech using Spitch TTS.
1693
-
1694
- Args:
1695
- text: Text to convert to speech
1696
- target_language: Target language for speech
1697
- voice: Voice to use (e.g., 'lina', 'ada', 'kofi')
1698
-
1699
- Returns:
1700
- Audio bytes
1701
- """
1702
- try:
1703
- # Call Spitch TTS API
1704
- response = self.client.speech.generate(
1705
- text=text,
1706
- language=target_language,
1707
- voice=voice
1708
- )
1709
-
1710
- # ✅ FIX: Spitch returns BinaryAPIResponse, use .read() to get bytes
1711
- if hasattr(response, 'read'):
1712
- audio_bytes = response.read()
1713
- print(f"✅ TTS generated {len(audio_bytes)} bytes of audio")
1714
- return audio_bytes
1715
- else:
1716
- print(f"❌ Response type: {type(response)}")
1717
- print(f"❌ Response attributes: {dir(response)}")
1718
- return None
1719
-
1720
- except Exception as e:
1721
- print(f"❌ TTS error: {e}")
1722
- import traceback
1723
- traceback.print_exc()
1724
- return None
1725
-
1726
-
1727
- # ============================================================================
1728
- # STEP 2: Integrate Voice with Your LangChain RAG System
1729
- # ============================================================================
1730
-
1731
- class WemaVoiceAssistant:
1732
- """
1733
- Complete voice-enabled assistant combining Spitch voice I/O
1734
- with your existing Wema RAG system.
1735
- """
1736
-
1737
- def __init__(
1738
- self,
1739
- rag_system,
1740
- chain,
1741
- spitch_api_key: str
1742
- ):
1743
- """
1744
- Initialize the voice assistant.
1745
-
1746
- Args:
1747
- rag_system: Your initialized WemaRAGSystem
1748
- chain: Your LangChain RAG chain (already created)
1749
- spitch_api_key: Spitch API key
1750
- """
1751
- self.rag_system = rag_system
1752
- self.voice_handler = SpitchVoiceHandler(spitch_api_key)
1753
- self.chain = chain
1754
-
1755
- def process_voice_query(
1756
- self,
1757
- audio_input,
1758
- input_language: str = "en",
1759
- output_language: str = "en",
1760
- voice: str = "lina"
1761
- ):
1762
- """
1763
- Complete voice interaction pipeline:
1764
- 1. Speech to text (any language)
1765
- 2. Translate to English if needed
1766
- 3. Query RAG system
1767
- 4. Generate response
1768
- 5. Translate response if needed
1769
- 6. Text to speech
1770
-
1771
- Args:
1772
- audio_input: Audio file from user
1773
- input_language: User's spoken language
1774
- output_language: Desired response language
1775
- voice: TTS voice to use
1776
-
1777
- Returns:
1778
- tuple: (response_text, response_audio)
1779
- """
1780
- try:
1781
- # Step 1: Transcribe audio to text
1782
- print(f"Transcribing audio in {input_language}...")
1783
- transcribed_text = self.voice_handler.transcribe_audio(
1784
- audio_input,
1785
- source_language=input_language
1786
- )
1787
- print(f"Transcribed: {transcribed_text}")
1788
-
1789
- # Step 2: Translate to English if not already
1790
- if input_language != "en":
1791
- print("Translating to English...")
1792
- english_query = self.voice_handler.translate_to_english(
1793
- transcribed_text,
1794
- source_lang=input_language
1795
- )
1796
- else:
1797
- english_query = transcribed_text
1798
-
1799
- print(f"English query: {english_query}")
1800
-
1801
- # Step 3: Get response from RAG system (in English)
1802
- print("Querying RAG system...")
1803
- response_text = self.chain.invoke({"query": english_query})
1804
- print(f"RAG response: {response_text[:100]}...")
1805
-
1806
- # Step 4: Translate response if needed
1807
- if output_language != "en":
1808
- print(f"Translating response to {output_language}...")
1809
- translation = self.voice_handler.client.text.translate(
1810
- text=response_text,
1811
- source="en",
1812
- target=output_language
1813
- )
1814
- final_text = translation.text
1815
- else:
1816
- final_text = response_text
1817
-
1818
- # Step 5: Generate speech
1819
- print("Generating speech...")
1820
- audio_response = self.voice_handler.synthesize_speech(
1821
- final_text,
1822
- target_language=output_language,
1823
- voice=voice
1824
- )
1825
-
1826
- return final_text, audio_response
1827
-
1828
- except Exception as e:
1829
- error_msg = f"An error occurred: {str(e)}"
1830
- print(error_msg)
1831
- return error_msg, None
1832
-
1833
-
1834
- # ============================================================================
1835
- # STEP 3: Helper Functions for Audio File Management
1836
- # ============================================================================
1837
-
1838
- def save_audio_to_temp_file(audio_bytes):
1839
- """Save audio bytes to a temporary file and return the path."""
1840
- if audio_bytes is None:
1841
- return None
1842
-
1843
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
1844
- temp_file.write(audio_bytes)
1845
- temp_file.close()
1846
-
1847
- return temp_file.name
1848
-
1849
-
1850
- def cleanup_temp_audio_files():
1851
- """Clean up temporary audio files on exit."""
1852
- temp_dir = tempfile.gettempdir()
1853
- for temp_file in glob.glob(os.path.join(temp_dir, "tmp*.mp3")):
1854
- try:
1855
- os.remove(temp_file)
1856
- except:
1857
- pass
1858
-
1859
-
1860
- # Register cleanup function to run on exit
1861
- atexit.register(cleanup_temp_audio_files)
1862
-
1863
-
1864
- # ============================================================================
1865
- # STEP 4: Create Gradio Interface (With Text AND Voice Options)
1866
- # ============================================================================
1867
-
1868
- def create_voice_gradio_interface(
1869
- rag_system,
1870
- chain,
1871
- spitch_api_key: str
1872
- ):
1873
- """
1874
- Create a Gradio interface with BOTH text and voice input/output capabilities.
1875
-
1876
- Args:
1877
- rag_system: Your initialized WemaRAGSystem
1878
- chain: Your LangChain RAG chain (already created)
1879
- spitch_api_key: Spitch API key
1880
-
1881
- Returns:
1882
- Gradio Interface
1883
- """
1884
-
1885
- # Initialize voice assistant
1886
- assistant = WemaVoiceAssistant(rag_system, chain, spitch_api_key)
1887
-
1888
- # ✅ CORRECT: Exact voice-language mapping from Spitch documentation
1889
- LANGUAGE_CONFIG = {
1890
- "English": {
1891
- "code": "en",
1892
- "voices": ["john", "lucy", "lina", "jude", "henry", "kani", "kingsley",
1893
- "favour", "comfort", "daniel", "remi"]
1894
- },
1895
- "Yoruba": {
1896
- "code": "yo",
1897
- "voices": ["sade", "funmi", "segun", "femi"]
1898
- },
1899
- "Igbo": {
1900
- "code": "ig",
1901
- "voices": ["obinna", "ngozi", "amara", "ebuka"]
1902
- },
1903
- "Hausa": {
1904
- "code": "ha",
1905
- "voices": ["hasan", "amina", "zainab", "aliyu"]
1906
- }
1907
- }
1908
-
1909
- # Extract just language names for dropdowns
1910
- ALL_LANGUAGES = list(LANGUAGE_CONFIG.keys())
1911
-
1912
- # ✅ FIXED: Only voices that actually exist in Spitch
1913
- # Check Spitch docs for exact voice names
1914
- VOICES = ["lina", "ada", "kofi"] # Verify these exist
1915
-
1916
- def handle_text_query(text_input):
1917
- """Handle text-only queries."""
1918
- if not text_input or text_input.strip() == "":
1919
- return "Please enter a question.", None
1920
-
1921
- try:
1922
- response = chain.invoke({"query": text_input})
1923
- return response, None
1924
- except Exception as e:
1925
- return f"Error: {str(e)}", None
1926
-
1927
- def update_voices(language):
1928
- """Update voice dropdown based on selected language."""
1929
- voices = LANGUAGE_CONFIG.get(language, {}).get("voices", ["lina"])
1930
- return gr.Dropdown(choices=voices, value=voices[0])
1931
-
1932
- def handle_voice_interaction(audio, input_lang, output_lang, voice):
1933
- """Gradio handler function for voice - FIXED VERSION."""
1934
- print("="*60)
1935
- print("VOICE INTERACTION STARTED")
1936
- print(f"Audio input: {audio}")
1937
- print(f"Input language: {input_lang}")
1938
- print(f"Output language: {output_lang}")
1939
- print(f"Voice: {voice}")
1940
- print("="*60)
1941
-
1942
- if audio is None:
1943
- return "Please record or upload audio.", None
1944
-
1945
- # Get language codes and voices
1946
- input_config = LANGUAGE_CONFIG.get(input_lang, LANGUAGE_CONFIG["English"])
1947
- output_config = LANGUAGE_CONFIG.get(output_lang, LANGUAGE_CONFIG["English"])
1948
-
1949
- input_code = input_config["code"]
1950
- output_code = output_config["code"]
1951
-
1952
- # Validate voice for output language
1953
- available_voices = output_config["voices"]
1954
- if voice not in available_voices:
1955
- voice = available_voices[0]
1956
- print(f"⚠️ Voice changed to {voice} for {output_lang}")
1957
-
1958
- try:
1959
- # Process voice query
1960
- print("\n🎤 Processing voice query...")
1961
-
1962
- # Step 1: Transcribe (supports more languages)
1963
- transcribed_text = assistant.voice_handler.transcribe_audio(
1964
- audio,
1965
- source_language=input_code
1966
- )
1967
- print(f"📝 Transcribed ({input_lang}): {transcribed_text}")
1968
-
1969
- # Check if transcription failed
1970
- if "Error" in transcribed_text or "Sorry" in transcribed_text:
1971
- return transcribed_text, None
1972
-
1973
- # Step 2: Translate to English if needed
1974
- if input_code != "en":
1975
- print("🌍 Translating to English...")
1976
- english_query = assistant.voice_handler.translate_to_english(
1977
- transcribed_text,
1978
- source_lang=input_code
1979
- )
1980
- print(f"🇬🇧 English query: {english_query}")
1981
- else:
1982
- english_query = transcribed_text
1983
-
1984
- # Step 3: Get RAG response (ALWAYS in English first)
1985
- print("🔍 Querying RAG system...")
1986
- try:
1987
- response_text = assistant.chain.invoke({"query": english_query})
1988
- print(f"✅ RAG response (English): {response_text[:200]}...")
1989
- except Exception as e:
1990
- error_msg = f"Error getting response: {str(e)}"
1991
- print(f"❌ RAG Error: {error_msg}")
1992
- return error_msg, None
1993
-
1994
- # Step 4: Decide what to do with translation
1995
- if output_code != "en":
1996
- print(f"🌍 Translating response from English to {output_lang}...")
1997
-
1998
- # ⚠️ IMPORTANT: Keep response short for better translation
1999
- # Long technical responses translate poorly
2000
- if len(response_text) > 500:
2001
- print(f"⚠️ Response is long ({len(response_text)} chars), keeping English for accuracy")
2002
- final_text = response_text
2003
- tts_text = response_text
2004
- tts_language = "en"
2005
- tts_voice = "lina"
2006
- translation_note = f"\n\n⚠️ (Audio response is in English for accuracy. Full {output_lang} translation above.)"
2007
- else:
2008
- try:
2009
- translation = assistant.voice_handler.client.text.translate(
2010
- text=response_text,
2011
- source="en",
2012
- target=output_code
2013
- )
2014
- translated_text = translation.text
2015
- print(f"✅ Translated to {output_lang}: {translated_text[:200]}...")
2016
-
2017
- final_text = translated_text
2018
- tts_text = translated_text
2019
- tts_language = output_code
2020
- tts_voice = voice
2021
- translation_note = ""
2022
-
2023
- except Exception as e:
2024
- print(f"⚠️ Translation failed: {e}, using English")
2025
- final_text = response_text
2026
- tts_text = response_text
2027
- tts_language = "en"
2028
- tts_voice = "lina"
2029
- translation_note = f"\n\n⚠️ (Translation to {output_lang} failed, showing English response)"
2030
- else:
2031
- final_text = response_text
2032
- tts_text = response_text
2033
- tts_language = "en"
2034
- tts_voice = voice
2035
- translation_note = ""
2036
-
2037
- # Step 5: Generate speech
2038
- print(f"🔊 Generating speech in {tts_language} with voice {tts_voice}...")
2039
- print(f"🔊 TTS Text preview: {tts_text[:100]}...")
2040
-
2041
- audio_bytes = assistant.voice_handler.synthesize_speech(
2042
- tts_text,
2043
- target_language=tts_language,
2044
- voice=tts_voice
2045
- )
2046
-
2047
- print(f"🔊 Audio bytes type: {type(audio_bytes)}")
2048
- print(f"🔊 Audio bytes length: {len(audio_bytes) if audio_bytes else 0}")
2049
-
2050
- # ✅ FIX: Convert audio bytes to file path
2051
- audio_file_path = None
2052
- if audio_bytes:
2053
- print("\n💾 Saving audio to temp file...")
2054
- audio_file_path = save_audio_to_temp_file(audio_bytes)
2055
- print(f"✅ Audio saved to: {audio_file_path}")
2056
-
2057
- # Verify file exists and has content
2058
- if audio_file_path and os.path.exists(audio_file_path):
2059
- file_size = os.path.getsize(audio_file_path)
2060
- print(f"✅ File size: {file_size} bytes")
2061
- else:
2062
- print("❌ File was not created properly!")
2063
- else:
2064
- print("❌ No audio bytes received from TTS")
2065
-
2066
- # Add translation note if needed
2067
- final_text = final_text + translation_note
2068
-
2069
- print("="*60)
2070
- return final_text, audio_file_path
2071
-
2072
- except Exception as e:
2073
- error_msg = f"Error processing voice: {str(e)}"
2074
- print(f"\n❌ ERROR: {error_msg}")
2075
- import traceback
2076
- traceback.print_exc()
2077
- print("="*60)
2078
- return error_msg, None
2079
-
2080
- # Create Gradio interface with BOTH text and voice
2081
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
2082
- gr.Markdown("""
2083
- # 🏦 Wema Bank AI Assistant
2084
- ### Powered by Spitch AI & LangChain RAG
2085
-
2086
- Choose how you want to interact: Type or Speak!
2087
- """)
2088
-
2089
- with gr.Tabs():
2090
- # TEXT TAB
2091
- with gr.Tab("💬 Text Chat"):
2092
- gr.Markdown("### Type your banking questions")
2093
-
2094
- text_input = gr.Textbox(
2095
- label="Your Question",
2096
- placeholder="Ask me anything about Wema Bank products and services...",
2097
- lines=3
2098
- )
2099
-
2100
- text_submit_btn = gr.Button("📤 Send", variant="primary", size="lg")
2101
-
2102
- text_output = gr.Textbox(
2103
- label="Response",
2104
- lines=10,
2105
- interactive=False
2106
- )
2107
-
2108
- # Examples for text
2109
- gr.Examples(
2110
- examples=[
2111
- ["What is ALAT?"],
2112
- ["How do I open a savings account?"],
2113
- ["Tell me about Wema Kiddies Account"],
2114
- ["How can I avoid phishing scams?"],
2115
- ["What loans does Wema Bank offer?"]
2116
- ],
2117
- inputs=text_input,
2118
- label="💡 Try these questions"
2119
- )
2120
-
2121
- text_submit_btn.click(
2122
- fn=handle_text_query,
2123
- inputs=text_input,
2124
- outputs=[text_output, gr.Audio(visible=False)]
2125
- )
2126
-
2127
- # Also submit on Enter
2128
- text_input.submit(
2129
- fn=handle_text_query,
2130
- inputs=text_input,
2131
- outputs=[text_output, gr.Audio(visible=False)]
2132
- )
2133
-
2134
- # VOICE TAB
2135
- with gr.Tab("🎤 Voice Chat"):
2136
- gr.Markdown("""
2137
- ### Speak your banking questions in your language!
2138
-
2139
- **✅ Fully Supported Nigerian Languages:**
2140
- - 🇬🇧 **English** - 11 voices available
2141
- - 🇳🇬 **Yoruba** - 4 voices (Sade, Funmi, Segun, Femi)
2142
- - 🇳🇬 **Igbo** - 4 voices (Obinna, Ngozi, Amara, Ebuka)
2143
- - 🇳🇬 **Hausa** - 4 voices (Hasan, Amina, Zainab, Aliyu)
2144
-
2145
- **💡 Translation Tips:**
2146
- - Simple questions translate best (e.g., "What is ALAT?", "How do I save money?")
2147
- - Long technical responses may be kept in English for accuracy
2148
- - You can always ask in your language and get text in both languages!
2149
- """)
2150
-
2151
- with gr.Row():
2152
- with gr.Column():
2153
- audio_input = gr.Audio(
2154
- sources=["microphone", "upload"],
2155
- type="filepath",
2156
- label="🎙️ Record or Upload Audio"
2157
- )
2158
-
2159
- input_language = gr.Dropdown(
2160
- choices=ALL_LANGUAGES,
2161
- value="English",
2162
- label="Your Language (Speech Input)"
2163
- )
2164
-
2165
- with gr.Column():
2166
- output_language = gr.Dropdown(
2167
- choices=ALL_LANGUAGES,
2168
- value="English",
2169
- label="Response Language (Audio Output)"
2170
- )
2171
-
2172
- voice_selection = gr.Dropdown(
2173
- choices=LANGUAGE_CONFIG["English"]["voices"],
2174
- value="lina",
2175
- label="Voice"
2176
- )
2177
-
2178
- # Update voices when output language changes
2179
- output_language.change(
2180
- fn=update_voices,
2181
- inputs=output_language,
2182
- outputs=voice_selection
2183
- )
2184
-
2185
- voice_submit_btn = gr.Button("🚀 Ask Wema Assist", variant="primary", size="lg")
2186
-
2187
- voice_text_output = gr.Textbox(
2188
- label="📝 Text Response",
2189
- lines=8,
2190
- interactive=False
2191
- )
2192
-
2193
- voice_audio_output = gr.Audio(
2194
- label="🔊 Audio Response",
2195
- type="filepath" # ✅ Important: must be filepath
2196
- )
2197
-
2198
- voice_submit_btn.click(
2199
- fn=handle_voice_interaction,
2200
- inputs=[audio_input, input_language, output_language, voice_selection],
2201
- outputs=[voice_text_output, voice_audio_output]
2202
- )
2203
-
2204
- gr.Markdown("""
2205
- ---
2206
- ### 📌 Features
2207
- - **Text Chat**: Fast and simple - just type and get instant responses
2208
- - **Voice Chat**: Full support for Nigerian languages!
2209
-
2210
- ### 🇳🇬 Supported Nigerian Languages
2211
- ✅ **English** - 11 different voices (male & female)
2212
- ✅ **Yoruba** - E ku ọjọ! (4 authentic Yoruba voices)
2213
- ✅ **Igbo** - Nnọọ! (4 authentic Igbo voices)
2214
- ✅ **Hausa** - Sannu! (4 authentic Hausa voices)
2215
-
2216
- 💡 **All features work in every language:**
2217
- - 🎤 Speak your question in your language
2218
- - 📝 Get text response translated
2219
- - 🔊 Hear authentic audio response in your language
2220
- - 🔄 Seamless translation between languages
2221
- """)
2222
-
2223
- return demo
2224
-
2225
-
2226
- # ============================================================================
2227
- # ALTERNATIVE: Simpler Hybrid Interface
2228
- # ============================================================================
2229
-
2230
- def create_hybrid_interface(
2231
- rag_system,
2232
- chain,
2233
- spitch_api_key: str
2234
- ):
2235
- """
2236
- Creates a simpler interface supporting both text and voice input.
2237
-
2238
- Args:
2239
- rag_system: Your initialized WemaRAGSystem
2240
- chain: Your LangChain RAG chain (already created)
2241
- spitch_api_key: Spitch API key
2242
-
2243
- Returns:
2244
- Gradio Interface
2245
- """
2246
-
2247
- assistant = WemaVoiceAssistant(rag_system, chain, spitch_api_key)
2248
-
2249
- def handle_text_query(text_input):
2250
- """Handle text-only query."""
2251
- try:
2252
- response = chain.invoke({"query": text_input})
2253
- return response, None
2254
- except Exception as e:
2255
- return f"Error: {str(e)}", None
2256
-
2257
- def handle_voice_query(audio, input_lang, output_lang, voice):
2258
- """Handle voice query."""
2259
- if audio is None:
2260
- return "Please provide audio input.", None
2261
-
2262
- LANGUAGES = {
2263
- "English": "en",
2264
- "Yoruba": "yo",
2265
- "Igbo": "ig",
2266
- "Hausa": "ha"
2267
- }
2268
-
2269
- input_code = LANGUAGES.get(input_lang, "en")
2270
- output_code = LANGUAGES.get(output_lang, "en")
2271
-
2272
- # Process voice query
2273
- text_response, audio_bytes = assistant.process_voice_query(
2274
- audio,
2275
- input_language=input_code,
2276
- output_language=output_code,
2277
- voice=voice
2278
- )
2279
-
2280
- # Convert audio bytes to file path
2281
- audio_file_path = None
2282
- if audio_bytes:
2283
- audio_file_path = save_audio_to_temp_file(audio_bytes)
2284
-
2285
- return text_response, audio_file_path
2286
-
2287
- # Create tabbed interface
2288
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
2289
- gr.Markdown("# 🏦 Wema Bank AI Assistant")
2290
-
2291
- with gr.Tabs():
2292
- # Text Tab
2293
- with gr.Tab("💬 Text Chat"):
2294
- text_input = gr.Textbox(
2295
- label="Type your question",
2296
- placeholder="Ask about Wema Bank products and services..."
2297
- )
2298
- text_submit = gr.Button("Send")
2299
- text_output = gr.Textbox(label="Response", lines=10)
2300
-
2301
- text_submit.click(
2302
- fn=handle_text_query,
2303
- inputs=text_input,
2304
- outputs=[text_output, gr.Audio(visible=False)]
2305
- )
2306
-
2307
- # Voice Tab
2308
- with gr.Tab("🎤 Voice Chat"):
2309
- audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
2310
-
2311
- with gr.Row():
2312
- input_lang = gr.Dropdown(
2313
- ["English", "Yoruba", "Igbo", "Hausa"],
2314
- value="English",
2315
- label="Input Language"
2316
- )
2317
- output_lang = gr.Dropdown(
2318
- ["English", "Yoruba", "Igbo", "Hausa"],
2319
- value="English",
2320
- label="Output Language"
2321
- )
2322
- voice = gr.Dropdown(
2323
- ["lina", "ada", "kofi"],
2324
- value="lina",
2325
- label="Voice"
2326
- )
2327
-
2328
- voice_submit = gr.Button("Ask")
2329
- voice_text_output = gr.Textbox(label="Response Text", lines=8)
2330
- voice_audio_output = gr.Audio(label="Audio Response", type="filepath")
2331
-
2332
- voice_submit.click(
2333
- fn=handle_voice_query,
2334
- inputs=[audio_input, input_lang, output_lang, voice],
2335
- outputs=[voice_text_output, voice_audio_output]
2336
- )
2337
-
2338
- return demo
 
7
  https://colab.research.google.com/drive/17WecCovbP3TgYvHDyZ4Yckj77r2q5Nam
8
  """
9
 
 
 
 
 
10
 
11
  # Cell to add FIRST - Your Original WemaRAGSystem
12
  import json
 
1542
  )
1543
 
1544
  iface.launch(share=True, debug=True)