ankitklakra's picture
Update app.py
be438a0 verified
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import os
import json
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from datetime import datetime
from gtts import gTTS
import tempfile
import requests
# --- CONFIGURATION ---
MODEL_K2H_REPO = "ankitklakra/kurukh-to-hindi"
MODEL_H2K_REPO = "ankitklakra/hindi-to-kurukh"
SHEET_NAME = "Kurukh_Feedback_Log"
print("Loading Translation Models...")
try:
tokenizer = AutoTokenizer.from_pretrained("google/mt5-small")
model_k2h = AutoModelForSeq2SeqLM.from_pretrained(MODEL_K2H_REPO)
model_h2k = AutoModelForSeq2SeqLM.from_pretrained(MODEL_H2K_REPO)
pipe_k2h = pipeline("text2text-generation", model=model_k2h, tokenizer=tokenizer)
pipe_h2k = pipeline("text2text-generation", model=model_h2k, tokenizer=tokenizer)
except Exception as e:
print(f"Error loading translation models: {e}")
print("Loading Voice Model...")
try:
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
except Exception as e:
print(f"Error loading whisper model: {e}")
asr_pipeline = None
# --- HELPER FUNCTIONS ---
def transliterate_to_hindi(text):
try:
url = "https://inputtools.google.com/request?text={}&itc=hi-t-i0-und&num=1"
response = requests.get(url.format(text))
result = response.json()
return result[1][0][1][0]
except:
return text
def save_to_sheet(original, translation, correction, direction):
# --- VALIDATION CHECK ---
if not original or not original.strip():
return "⚠️ Error: Original text is missing."
if not correction or not correction.strip():
return "⚠️ Error: Please enter your correction before submitting."
try:
json_creds = os.getenv("GOOGLE_CREDENTIALS")
if not json_creds:
return "⚠️ Error: Credentials missing."
creds_dict = json.loads(json_creds)
scope = [
"https://spreadsheets.google.com/feeds",
"https://www.googleapis.com/auth/drive",
]
creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
client = gspread.authorize(creds)
sheet = client.open(SHEET_NAME).sheet1
if not sheet.get_all_values():
sheet.append_row(
[
"Timestamp",
"Direction",
"Original Text",
"AI Translation",
"User Correction",
]
)
sheet.append_row(
[str(datetime.now()), direction, original, translation, correction]
)
return "✅ Saved to Google Sheets."
except Exception as e:
return f"❌ Error: {str(e)}"
def speech_to_text(audio_path):
if audio_path is None or asr_pipeline is None:
return ""
return asr_pipeline(audio_path)["text"]
def text_to_speech(text, language="hi"):
if not text:
return None
try:
tts = gTTS(text=text, lang=language)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_file.name)
return temp_file.name
except:
return None
# --- MAIN TRANSLATION LOGIC ---
def process_translation(text, audio_input, direction, is_hinglish):
original_text = speech_to_text(audio_input) if audio_input else text
if not original_text:
return "", "", None
if direction == "Hindi -> Kurukh" and is_hinglish:
original_text = transliterate_to_hindi(original_text)
target_pipeline = pipe_k2h if direction == "Kurukh -> Hindi" else pipe_h2k
try:
results = target_pipeline(
original_text,
max_length=128,
num_beams=5,
no_repeat_ngram_size=2,
repetition_penalty=2.0,
early_stopping=True,
)
translated_text = results[0]["generated_text"]
except Exception as e:
return str(e), "", None
audio_output = None
if direction == "Kurukh -> Hindi":
audio_output = text_to_speech(translated_text, "hi")
return original_text, translated_text, audio_output
# --- CSS ---
universal_css = """
<style>
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600&display=swap');
body, button, input, select, textarea, .gradio-container {
font-family: 'Poppins', sans-serif !important;
}
.header-div {
text-align: center;
margin-bottom: 25px;
padding: 20px;
background: linear-gradient(to right, #f8f9fa, #e9ecef);
border-radius: 15px;
}
.header-title {
font-size: 2.2em;
font-weight: 700;
color: #2c3e50;
}
.header-subtitle {
font-size: 1.1em;
color: #576574;
}
</style>
"""
# --- UI ---
with gr.Blocks(title="Kurukh AI Translator") as demo:
gr.HTML(universal_css)
gr.HTML("""
<div class="header-div">
<h1 class="header-title">🇮🇳 AI Kurukh (Oraon) Translator</h1>
<p class="header-subtitle">
Bridging Communities with Artificial Intelligence | Voice & Hinglish Supported
</p>
</div>
""")
with gr.Tabs():
# --- Translator Tab ---
with gr.TabItem("🗣️ Translator"):
with gr.Accordion("ℹ️ How to use (Click to expand)", open=False):
gr.Markdown("""
1. Select translation mode.
2. Enable Hinglish if typing Hindi in English letters.
3. Use Voice input if needed.
""")
with gr.Row():
# LEFT
with gr.Column():
direction = gr.Radio(
["Kurukh -> Hindi", "Hindi -> Kurukh"],
label="Translation Mode",
value="Kurukh -> Hindi",
)
is_hinglish = gr.Checkbox(
label="🔤 Hinglish Typing (e.g., 'Tumhara')", value=False
)
input_text = gr.Textbox(
label="Enter Text", placeholder="Type sentences here...", lines=4
)
input_audio = gr.Audio(
sources=["microphone"],
type="filepath",
label="🎙️ Voice Input (Hindi Only)",
)
translate_btn = gr.Button("Translate 🚀")
# RIGHT
with gr.Column():
output_text = gr.Textbox(
label="Translation",
lines=4,
interactive=False
)
output_audio = gr.Audio(
label="🔊 Listen (Hindi Only)", interactive=False
)
# --- EXAMPLES SECTION ---
gr.Markdown("### 💡 Try these examples:")
gr.Examples(
examples=[
# 1. Kurukh (Devanagari Script)
["निघै नामे इन्द्रा हिकै?", "Kurukh -> Hindi", False],
# 2. Hindi (Devanagari Script)
["तुम कहाँ जा रहे हो?", "Hindi -> Kurukh", False],
# 3. Hinglish (Roman Script -> needs Transliteration)
["Tum kahan ho?", "Hindi -> Kurukh", True],
],
inputs=[input_text, direction, is_hinglish],
label="Click on an example to load it:"
)
translate_btn.click(
fn=process_translation,
inputs=[input_text, input_audio, direction, is_hinglish],
outputs=[input_text, output_text, output_audio],
)
# --- Feedback Tab ---
with gr.TabItem("📝 Improve the AI"):
gr.Markdown("### 🛠️ Help us improve accuracy")
fb_direction = gr.Radio(
["Kurukh -> Hindi", "Hindi -> Kurukh"],
label="Direction",
value="Kurukh -> Hindi",
)
fb_original = gr.Textbox(label="Original Text")
fb_ai_output = gr.Textbox(label="AI's Translation")
fb_user_correct = gr.Textbox(
label="Correct Translation", lines=2
)
submit_btn = gr.Button("Submit Correction")
status_lbl = gr.Label(label="Status")
submit_btn.click(
fn=save_to_sheet,
inputs=[fb_original, fb_ai_output, fb_user_correct, fb_direction],
outputs=status_lbl,
)
gr.Markdown("---")
gr.HTML(
"<center style='color:#777;'>Built with ❤️ for the Kurukh Community</center>"
)
demo.launch()