Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 3 |
import os
|
| 4 |
import json
|
|
@@ -7,7 +7,7 @@ from oauth2client.service_account import ServiceAccountCredentials
|
|
| 7 |
from datetime import datetime
|
| 8 |
from gtts import gTTS
|
| 9 |
import tempfile
|
| 10 |
-
import requests
|
| 11 |
|
| 12 |
# --- CONFIGURATION ---
|
| 13 |
MODEL_K2H_REPO = "ankitklakra/kurukh-to-hindi"
|
|
@@ -27,17 +27,14 @@ print("Loading Voice Model...")
|
|
| 27 |
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
|
| 28 |
|
| 29 |
# --- HELPER FUNCTIONS ---
|
| 30 |
-
|
| 31 |
def transliterate_to_hindi(text):
|
| 32 |
-
|
| 33 |
try:
|
| 34 |
url = "https://inputtools.google.com/request?text={}&itc=hi-t-i0-und&num=1"
|
| 35 |
response = requests.get(url.format(text))
|
| 36 |
result = response.json()
|
| 37 |
-
# The API returns a nested list; we grab the first suggestion
|
| 38 |
return result[1][0][1][0]
|
| 39 |
except:
|
| 40 |
-
return text
|
| 41 |
|
| 42 |
def save_to_sheet(original, translation, correction, direction):
|
| 43 |
try:
|
|
@@ -70,77 +67,199 @@ def text_to_speech(text, language="hi"):
|
|
| 70 |
|
| 71 |
# --- MAIN TRANSLATION LOGIC ---
|
| 72 |
def process_translation(text, audio_input, direction, is_hinglish):
|
| 73 |
-
|
| 74 |
-
# 1. Get Text from Voice or Typing
|
| 75 |
original_text = speech_to_text(audio_input) if audio_input else text
|
| 76 |
if not original_text: return "", "", None
|
| 77 |
|
| 78 |
-
# 2. Handle Hinglish
|
| 79 |
if direction == "Hindi -> Kurukh" and is_hinglish:
|
| 80 |
original_text = transliterate_to_hindi(original_text)
|
| 81 |
|
| 82 |
-
# 3. Translate
|
| 83 |
target_pipeline = pipe_k2h if direction == "Kurukh -> Hindi" else pipe_h2k
|
| 84 |
try:
|
| 85 |
-
|
| 86 |
results = target_pipeline(
|
| 87 |
-
original_text,
|
| 88 |
-
max_length=128,
|
| 89 |
-
num_beams=5,
|
| 90 |
-
no_repeat_ngram_size=2,
|
| 91 |
-
repetition_penalty=2.0,
|
| 92 |
early_stopping=True
|
| 93 |
)
|
| 94 |
translated_text = results[0]['generated_text']
|
| 95 |
except Exception as e:
|
| 96 |
return str(e), "", None
|
| 97 |
|
| 98 |
-
# 4. Audio Output (For Hindi)
|
| 99 |
audio_output = None
|
| 100 |
if direction == "Kurukh -> Hindi":
|
| 101 |
audio_output = text_to_speech(translated_text, "hi")
|
| 102 |
|
| 103 |
return original_text, translated_text, audio_output
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
# --- THE UI ---
|
| 106 |
-
with gr.Blocks() as demo:
|
| 107 |
-
gr.Markdown("# ๐ฎ๐ณ AI Kurukh (Kurux) Translator")
|
| 108 |
-
gr.Markdown("### Voice & Hinglish Supported")
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
with gr.Tabs():
|
|
|
|
|
|
|
| 111 |
with gr.TabItem("๐ฃ๏ธ Translator"):
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
with gr.Row():
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
translate_btn.click(
|
| 129 |
fn=process_translation,
|
| 130 |
inputs=[input_text, input_audio, direction, is_hinglish],
|
| 131 |
outputs=[input_text, output_text, output_audio]
|
| 132 |
)
|
| 133 |
|
|
|
|
| 134 |
with gr.TabItem("๐ Improve the AI"):
|
| 135 |
-
gr.
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
demo.launch()
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 3 |
import os
|
| 4 |
import json
|
|
|
|
| 7 |
from datetime import datetime
|
| 8 |
from gtts import gTTS
|
| 9 |
import tempfile
|
| 10 |
+
import requests
|
| 11 |
|
| 12 |
# --- CONFIGURATION ---
|
| 13 |
MODEL_K2H_REPO = "ankitklakra/kurukh-to-hindi"
|
|
|
|
| 27 |
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
|
| 28 |
|
| 29 |
# --- HELPER FUNCTIONS ---
|
|
|
|
| 30 |
def transliterate_to_hindi(text):
|
|
|
|
| 31 |
try:
|
| 32 |
url = "https://inputtools.google.com/request?text={}&itc=hi-t-i0-und&num=1"
|
| 33 |
response = requests.get(url.format(text))
|
| 34 |
result = response.json()
|
|
|
|
| 35 |
return result[1][0][1][0]
|
| 36 |
except:
|
| 37 |
+
return text
|
| 38 |
|
| 39 |
def save_to_sheet(original, translation, correction, direction):
|
| 40 |
try:
|
|
|
|
| 67 |
|
| 68 |
# --- MAIN TRANSLATION LOGIC ---
|
| 69 |
def process_translation(text, audio_input, direction, is_hinglish):
|
|
|
|
|
|
|
| 70 |
original_text = speech_to_text(audio_input) if audio_input else text
|
| 71 |
if not original_text: return "", "", None
|
| 72 |
|
|
|
|
| 73 |
if direction == "Hindi -> Kurukh" and is_hinglish:
|
| 74 |
original_text = transliterate_to_hindi(original_text)
|
| 75 |
|
|
|
|
| 76 |
target_pipeline = pipe_k2h if direction == "Kurukh -> Hindi" else pipe_h2k
|
| 77 |
try:
|
|
|
|
| 78 |
results = target_pipeline(
|
| 79 |
+
original_text,
|
| 80 |
+
max_length=128,
|
| 81 |
+
num_beams=5,
|
| 82 |
+
no_repeat_ngram_size=2,
|
| 83 |
+
repetition_penalty=2.0,
|
| 84 |
early_stopping=True
|
| 85 |
)
|
| 86 |
translated_text = results[0]['generated_text']
|
| 87 |
except Exception as e:
|
| 88 |
return str(e), "", None
|
| 89 |
|
|
|
|
| 90 |
audio_output = None
|
| 91 |
if direction == "Kurukh -> Hindi":
|
| 92 |
audio_output = text_to_speech(translated_text, "hi")
|
| 93 |
|
| 94 |
return original_text, translated_text, audio_output
|
| 95 |
|
| 96 |
+
# --- CUSTOM CSS ---
|
| 97 |
+
|
| 98 |
+
custom_css = """
|
| 99 |
+
/* Import a nice font (Poppins) */
|
| 100 |
+
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600&display=swap');
|
| 101 |
+
|
| 102 |
+
/* Apply font to everything */
|
| 103 |
+
body, button, input, select, textarea, .gradio-container {
|
| 104 |
+
font-family: 'Poppins', sans-serif !important;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
/* Header Styling */
|
| 108 |
+
.header-title {
|
| 109 |
+
text-align: center;
|
| 110 |
+
color: #2c3e50;
|
| 111 |
+
margin-bottom: 0.5rem;
|
| 112 |
+
font-size: 2.2em;
|
| 113 |
+
font-weight: 600;
|
| 114 |
+
}
|
| 115 |
+
.header-subtitle {
|
| 116 |
+
text-align: center;
|
| 117 |
+
color: #7f8c8d;
|
| 118 |
+
margin-bottom: 2rem;
|
| 119 |
+
font-weight: 300;
|
| 120 |
+
font-size: 1.1em;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
/* Card Container Styling */
|
| 124 |
+
.input-container, .output-container {
|
| 125 |
+
background: #ffffff;
|
| 126 |
+
border: 1px solid #e0e0e0;
|
| 127 |
+
border-radius: 12px;
|
| 128 |
+
padding: 25px;
|
| 129 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.05);
|
| 130 |
+
margin-bottom: 20px;
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
/* Button Styling */
|
| 134 |
+
#translate-btn {
|
| 135 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 136 |
+
border: none;
|
| 137 |
+
color: white;
|
| 138 |
+
font-weight: 600;
|
| 139 |
+
font-size: 1.1em;
|
| 140 |
+
padding: 10px 20px;
|
| 141 |
+
border-radius: 8px;
|
| 142 |
+
transition: transform 0.1s;
|
| 143 |
+
}
|
| 144 |
+
#translate-btn:hover {
|
| 145 |
+
transform: scale(1.02);
|
| 146 |
+
box-shadow: 0 5px 15px rgba(0,0,0,0.2);
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
/* Feedback Section */
|
| 150 |
+
.feedback-box {
|
| 151 |
+
background-color: #f8f9fa;
|
| 152 |
+
border-radius: 8px;
|
| 153 |
+
padding: 20px;
|
| 154 |
+
border-left: 5px solid #ffc107;
|
| 155 |
+
}
|
| 156 |
+
"""
|
| 157 |
+
|
| 158 |
# --- THE UI ---
|
| 159 |
+
with gr.Blocks(css=custom_css, title="Kurukh AI Translator") as demo:
|
|
|
|
|
|
|
| 160 |
|
| 161 |
+
# HEADER SECTION
|
| 162 |
+
with gr.Row():
|
| 163 |
+
with gr.Column():
|
| 164 |
+
gr.Markdown("# ๐ฎ๐ณ AI Kurukh (Oraon) Translator", elem_classes=["header-title"])
|
| 165 |
+
gr.Markdown("### Bridging Communities with Artificial Intelligence | Voice & Hinglish Supported", elem_classes=["header-subtitle"])
|
| 166 |
+
|
| 167 |
+
# TABS FOR NAVIGATION
|
| 168 |
with gr.Tabs():
|
| 169 |
+
|
| 170 |
+
# --- TAB 1: TRANSLATOR INTERFACE ---
|
| 171 |
with gr.TabItem("๐ฃ๏ธ Translator"):
|
| 172 |
|
| 173 |
+
# Help / Instructions (Collapsible)
|
| 174 |
+
with gr.Accordion("โน๏ธ How to use (Click to expand)", open=False):
|
| 175 |
+
gr.Markdown("""
|
| 176 |
+
1. **Select Mode:** Choose translation direction (Kurukh -> Hindi OR Hindi -> Kurukh).
|
| 177 |
+
2. **Hinglish Support:** Check the box if you are typing Hindi words using English letters (e.g., 'Tumhara naam kya hai').
|
| 178 |
+
3. **Input Methods:** Type text in the box OR click the microphone to speak (Hindi audio supported).
|
| 179 |
+
4. **Results:** View the translation and listen to the audio output (for Hindi results).
|
| 180 |
+
""")
|
| 181 |
+
|
| 182 |
+
# Main Grid Layout
|
| 183 |
with gr.Row():
|
| 184 |
+
# LEFT COLUMN (INPUT CARD)
|
| 185 |
+
with gr.Column(elem_classes=["input-container"]):
|
| 186 |
+
gr.Markdown("### ๐ฅ Input Source")
|
| 187 |
+
|
| 188 |
+
with gr.Row():
|
| 189 |
+
direction = gr.Radio(
|
| 190 |
+
["Kurukh -> Hindi", "Hindi -> Kurukh"],
|
| 191 |
+
label="Translation Mode",
|
| 192 |
+
value="Kurukh -> Hindi"
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
is_hinglish = gr.Checkbox(label="๐ค Hinglish Typing (e.g. 'Tumhara')", value=False)
|
| 196 |
+
|
| 197 |
+
input_text = gr.Textbox(
|
| 198 |
+
label="Enter Text",
|
| 199 |
+
placeholder="Type sentences here...",
|
| 200 |
+
lines=4,
|
| 201 |
+
show_label=False
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
input_audio = gr.Audio(
|
| 205 |
+
sources=["microphone"],
|
| 206 |
+
type="filepath",
|
| 207 |
+
label="๐๏ธ Voice Input (Hindi Only)"
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
translate_btn = gr.Button("Translate ๐", variant="primary", elem_id="translate-btn")
|
| 211 |
+
|
| 212 |
+
# RIGHT COLUMN (OUTPUT CARD)
|
| 213 |
+
with gr.Column(elem_classes=["output-container"]):
|
| 214 |
+
gr.Markdown("### ๐ค Translation Result")
|
| 215 |
+
|
| 216 |
+
output_text = gr.Textbox(
|
| 217 |
+
label="Translation",
|
| 218 |
+
lines=4,
|
| 219 |
+
show_label=False,
|
| 220 |
+
interactive=False,
|
| 221 |
+
show_copy_button=True
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
output_audio = gr.Audio(
|
| 225 |
+
label="๐ Listen (Hindi Only)",
|
| 226 |
+
interactive=False,
|
| 227 |
+
autoplay=False
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
# Connect Logic
|
| 231 |
translate_btn.click(
|
| 232 |
fn=process_translation,
|
| 233 |
inputs=[input_text, input_audio, direction, is_hinglish],
|
| 234 |
outputs=[input_text, output_text, output_audio]
|
| 235 |
)
|
| 236 |
|
| 237 |
+
# --- TAB 2: FEEDBACK INTERFACE ---
|
| 238 |
with gr.TabItem("๐ Improve the AI"):
|
| 239 |
+
with gr.Column(elem_classes=["feedback-box"]):
|
| 240 |
+
gr.Markdown("### ๐ ๏ธ Help us improve accuracy")
|
| 241 |
+
gr.Markdown("Our AI is still learning! If you spot a mistake, please submit the correct translation below. This helps us train better versions.")
|
| 242 |
+
|
| 243 |
+
with gr.Row():
|
| 244 |
+
fb_direction = gr.Radio(["Kurukh -> Hindi", "Hindi -> Kurukh"], label="Direction", value="Kurukh -> Hindi")
|
| 245 |
+
|
| 246 |
+
with gr.Row():
|
| 247 |
+
fb_original = gr.Textbox(label="Original Text", placeholder="The sentence you tried to translate")
|
| 248 |
+
fb_ai_output = gr.Textbox(label="AI's Wrong Translation (Optional)")
|
| 249 |
+
|
| 250 |
+
fb_user_correct = gr.Textbox(label="โ
Your Correct Translation", lines=2, placeholder="Type the correct Kurukh/Hindi translation here")
|
| 251 |
+
|
| 252 |
+
submit_btn = gr.Button("Submit Correction to Database", variant="secondary")
|
| 253 |
+
status_lbl = gr.Label(label="Status")
|
| 254 |
+
|
| 255 |
+
submit_btn.click(
|
| 256 |
+
fn=save_to_sheet,
|
| 257 |
+
inputs=[fb_original, fb_ai_output, fb_user_correct, fb_direction],
|
| 258 |
+
outputs=status_lbl
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
# FOOTER
|
| 262 |
+
gr.Markdown("---")
|
| 263 |
+
gr.Markdown("<center>Built with โค๏ธ for the Kurukh Community โข Powered by Google mT5 & OpenAI Whisper</center>")
|
| 264 |
|
| 265 |
demo.launch()
|