mayowaibi commited on
Commit
7fb8c74
·
1 Parent(s): 033a988

Initial transfer from old repo

Browse files
Files changed (2) hide show
  1. app.py +175 -0
  2. requirements.txt +17 -0
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ import librosa
4
+ import os
5
+ import base64
6
+ import tempfile
7
+
8
+ from transformers import pipeline
9
+ from huggingface_hub import login
10
+ from google.cloud import translate_v3
11
+ # from livekit import api
12
+
13
+
14
+ # ===========================
15
+ # INITIAL SETUP
16
+ # ===========================
17
+
18
+ # Log in to Hugging Face
19
+ hf_token = os.getenv("HUGGINGFACE_TOKEN")
20
+ if hf_token:
21
+ login(token=hf_token)
22
+ else:
23
+ print("⚠️ No Hugging Face token found. You cannot access private models.")
24
+
25
+ # Load and decode Google credentials
26
+ creds_b64 = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
27
+ if creds_b64:
28
+ creds_json = base64.b64decode(creds_b64).decode("utf-8")
29
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
30
+ temp_file.write(creds_json.encode("utf-8"))
31
+ temp_file.flush()
32
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = temp_file.name
33
+
34
+ # LiveKit
35
+ # def create_livekit_token(user_name="guest"):
36
+ # LIVEKIT_URL = os.getenv("LIVEKIT_URL")
37
+ # LIVEKIT_API_KEY = os.getenv("LIVEKIT_API_KEY")
38
+ # LIVEKIT_API_SECRET = os.getenv("LIVEKIT_API_SECRET")
39
+
40
+ # token = api.AccessToken(LIVEKIT_API_KEY, LIVEKIT_API_SECRET)
41
+ # token.identity = user_name
42
+ # token.add_grant(api.VideoGrant(room="yoruba-demo-room"))
43
+ # jwt = token.to_jwt()
44
+
45
+ # return {"url": LIVEKIT_URL, "token": jwt}
46
+
47
+
48
+ # Google Cloud project ID
49
+ PROJECT_ID = "credentials/oluko-475823"
50
+
51
+
52
+ # ===========================
53
+ # LOAD ASR MODEL
54
+ # ===========================
55
+ asr = pipeline("automatic-speech-recognition", model="NCAIR1/Yoruba-ASR")
56
+ print("✅ Done loading models!\n")
57
+
58
+
59
+ # ===========================
60
+ # TRANSLATION FUNCTION
61
+ # ===========================
62
+ def translate_text(
63
+ text: str,
64
+ source_language_code: str = "yo",
65
+ target_language_code: str = "en-US",
66
+ ):
67
+ """Translate Yoruba text into English (or other languages)."""
68
+ client = translate_v3.TranslationServiceClient()
69
+ parent = f"projects/{PROJECT_ID}/locations/global"
70
+
71
+ response = client.translate_text(
72
+ contents=[text],
73
+ parent=parent,
74
+ mime_type="text/plain",
75
+ source_language_code=source_language_code,
76
+ target_language_code=target_language_code,
77
+ )
78
+
79
+ # Return the first translation result
80
+ return response.translations[0].translated_text
81
+
82
+
83
+ # ===========================
84
+ # ASR PROCESSING FUNCTION
85
+ # ===========================
86
+ # def process_audio(file_path):
87
+ # """Convert speech → Yoruba text using ASR."""
88
+ # if not file_path:
89
+ # return "..."
90
+
91
+ # audio, sr = librosa.load(file_path, sr=16000)
92
+ # result = asr(audio)
93
+ # transcription = result.get("text", "")
94
+ # return transcription
95
+
96
+
97
+ # ===========================
98
+ # GRADIO INTERFACE
99
+ # ===========================
100
+ with gr.Blocks(title="Yoruba AI Platform", theme=gr.themes.Glass()) as app:
101
+ gr.Markdown("# 🇳🇬 Olùkọ́")
102
+ gr.Markdown(
103
+ "Use this app to practice your Yoruba speaking."
104
+ )
105
+
106
+ # --- Tab 1: ASR + Translator ---
107
+ with gr.Tab("🎧 Yoruba Speech-to-Text + Translator"):
108
+ gr.Markdown("### Talk, we'll listen!")
109
+ audio_input = gr.Audio(type="filepath", label="🎙️ Speak Yoruba")
110
+ output_transcription = gr.Textbox(
111
+ label="✍️ Transcription (Yoruba)", interactive=True
112
+ )
113
+ output_translation = gr.Textbox(label="💬 Translation (English)")
114
+
115
+ # Link actions
116
+ audio_input.change(
117
+ process_audio,
118
+ inputs=audio_input,
119
+ outputs=output_transcription,
120
+ )
121
+ output_transcription.change(
122
+ translate_text,
123
+ inputs=output_transcription,
124
+ outputs=output_translation,
125
+ )
126
+
127
+ # --- Tab 2: LiveKit Conversational Agent (placeholder) ---
128
+ with gr.Tab("🗨️ Yoruba Conversationalist"):
129
+ gr.Markdown(
130
+ "### Real-time Yoruba Conversation\n"
131
+ "This feature connects to a LiveKit-powered conversational AI that lets you practice Yoruba speaking in real time."
132
+ )
133
+ gr.HTML(
134
+ """
135
+ <script src="https://google.com"></script>
136
+ <h3>🎙️ Start a Yoruba conversation</h3>
137
+ <button id="join-btn">Join Live Session</button>
138
+ <div id="video-area"></div>
139
+ <script>
140
+ async function joinSession() {
141
+ const res = await fetch('/create_token');
142
+ const data = await res.json();
143
+
144
+ const room = new LivekitClient.Room();
145
+ await room.connect(data.url, data.token);
146
+
147
+ const localTracks = await LivekitClient.createLocalTracks({ audio: true });
148
+ await room.localParticipant.publishTracks(localTracks);
149
+
150
+ room.on('trackSubscribed', (track) => {
151
+ const el = track.attach();
152
+ document.getElementById('video-area').appendChild(el);
153
+ });
154
+ }
155
+
156
+ document.getElementById('join-btn').onclick = joinSession;
157
+ </script>
158
+ """
159
+ )
160
+
161
+
162
+ # ===========================
163
+ # LIVEKIT TOKEN ROUTE
164
+ # ===========================
165
+ # @app.get("/create_token")
166
+ # def serve_token(request):
167
+ # name = request.query_params.get("name", "guest")
168
+ # return create_livekit_token(name)
169
+
170
+
171
+ # ===========================
172
+ # APP LAUNCH
173
+ # ===========================
174
+ if __name__ == "__main__":
175
+ app.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ librosa
3
+ torch
4
+ transformers
5
+ huggingface-hub
6
+ google-cloud-speech
7
+ google-cloud-translate
8
+ google-cloud-texttospeech
9
+ requests
10
+ livekit
11
+ livekit-agents
12
+ livekit-plugins-openai
13
+ livekit-plugins-google
14
+ livekit-plugins-silero
15
+ livekit-plugins-spitch
16
+ livekit-plugins-turn-detector
17
+ livekit-plugins-noise-cancellation