import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM import os import json import gspread from oauth2client.service_account import ServiceAccountCredentials from datetime import datetime from gtts import gTTS import tempfile import requests # --- CONFIGURATION --- MODEL_K2H_REPO = "ankitklakra/kurukh-to-hindi" MODEL_H2K_REPO = "ankitklakra/hindi-to-kurukh" SHEET_NAME = "Kurukh_Feedback_Log" print("Loading Translation Models...") try: tokenizer = AutoTokenizer.from_pretrained("google/mt5-small") model_k2h = AutoModelForSeq2SeqLM.from_pretrained(MODEL_K2H_REPO) model_h2k = AutoModelForSeq2SeqLM.from_pretrained(MODEL_H2K_REPO) pipe_k2h = pipeline("text2text-generation", model=model_k2h, tokenizer=tokenizer) pipe_h2k = pipeline("text2text-generation", model=model_h2k, tokenizer=tokenizer) except Exception as e: print(f"Error loading translation models: {e}") print("Loading Voice Model...") try: asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") except Exception as e: print(f"Error loading whisper model: {e}") asr_pipeline = None # --- HELPER FUNCTIONS --- def transliterate_to_hindi(text): try: url = "https://inputtools.google.com/request?text={}&itc=hi-t-i0-und&num=1" response = requests.get(url.format(text)) result = response.json() return result[1][0][1][0] except: return text def save_to_sheet(original, translation, correction, direction): # --- VALIDATION CHECK --- if not original or not original.strip(): return "⚠️ Error: Original text is missing." if not correction or not correction.strip(): return "⚠️ Error: Please enter your correction before submitting." try: json_creds = os.getenv("GOOGLE_CREDENTIALS") if not json_creds: return "⚠️ Error: Credentials missing." creds_dict = json.loads(json_creds) scope = [ "https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive", ] creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope) client = gspread.authorize(creds) sheet = client.open(SHEET_NAME).sheet1 if not sheet.get_all_values(): sheet.append_row( [ "Timestamp", "Direction", "Original Text", "AI Translation", "User Correction", ] ) sheet.append_row( [str(datetime.now()), direction, original, translation, correction] ) return "✅ Saved to Google Sheets." except Exception as e: return f"❌ Error: {str(e)}" def speech_to_text(audio_path): if audio_path is None or asr_pipeline is None: return "" return asr_pipeline(audio_path)["text"] def text_to_speech(text, language="hi"): if not text: return None try: tts = gTTS(text=text, lang=language) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(temp_file.name) return temp_file.name except: return None # --- MAIN TRANSLATION LOGIC --- def process_translation(text, audio_input, direction, is_hinglish): original_text = speech_to_text(audio_input) if audio_input else text if not original_text: return "", "", None if direction == "Hindi -> Kurukh" and is_hinglish: original_text = transliterate_to_hindi(original_text) target_pipeline = pipe_k2h if direction == "Kurukh -> Hindi" else pipe_h2k try: results = target_pipeline( original_text, max_length=128, num_beams=5, no_repeat_ngram_size=2, repetition_penalty=2.0, early_stopping=True, ) translated_text = results[0]["generated_text"] except Exception as e: return str(e), "", None audio_output = None if direction == "Kurukh -> Hindi": audio_output = text_to_speech(translated_text, "hi") return original_text, translated_text, audio_output # --- CSS --- universal_css = """ """ # --- UI --- with gr.Blocks(title="Kurukh AI Translator") as demo: gr.HTML(universal_css) gr.HTML("""

🇮🇳 AI Kurukh (Oraon) Translator

Bridging Communities with Artificial Intelligence | Voice & Hinglish Supported

""") with gr.Tabs(): # --- Translator Tab --- with gr.TabItem("🗣️ Translator"): with gr.Accordion("ℹ️ How to use (Click to expand)", open=False): gr.Markdown(""" 1. Select translation mode. 2. Enable Hinglish if typing Hindi in English letters. 3. Use Voice input if needed. """) with gr.Row(): # LEFT with gr.Column(): direction = gr.Radio( ["Kurukh -> Hindi", "Hindi -> Kurukh"], label="Translation Mode", value="Kurukh -> Hindi", ) is_hinglish = gr.Checkbox( label="🔤 Hinglish Typing (e.g., 'Tumhara')", value=False ) input_text = gr.Textbox( label="Enter Text", placeholder="Type sentences here...", lines=4 ) input_audio = gr.Audio( sources=["microphone"], type="filepath", label="🎙️ Voice Input (Hindi Only)", ) translate_btn = gr.Button("Translate 🚀") # RIGHT with gr.Column(): output_text = gr.Textbox( label="Translation", lines=4, interactive=False ) output_audio = gr.Audio( label="🔊 Listen (Hindi Only)", interactive=False ) # --- EXAMPLES SECTION --- gr.Markdown("### 💡 Try these examples:") gr.Examples( examples=[ # 1. Kurukh (Devanagari Script) ["निघै नामे इन्द्रा हिकै?", "Kurukh -> Hindi", False], # 2. Hindi (Devanagari Script) ["तुम कहाँ जा रहे हो?", "Hindi -> Kurukh", False], # 3. Hinglish (Roman Script -> needs Transliteration) ["Tum kahan ho?", "Hindi -> Kurukh", True], ], inputs=[input_text, direction, is_hinglish], label="Click on an example to load it:" ) translate_btn.click( fn=process_translation, inputs=[input_text, input_audio, direction, is_hinglish], outputs=[input_text, output_text, output_audio], ) # --- Feedback Tab --- with gr.TabItem("📝 Improve the AI"): gr.Markdown("### 🛠️ Help us improve accuracy") fb_direction = gr.Radio( ["Kurukh -> Hindi", "Hindi -> Kurukh"], label="Direction", value="Kurukh -> Hindi", ) fb_original = gr.Textbox(label="Original Text") fb_ai_output = gr.Textbox(label="AI's Translation") fb_user_correct = gr.Textbox( label="Correct Translation", lines=2 ) submit_btn = gr.Button("Submit Correction") status_lbl = gr.Label(label="Status") submit_btn.click( fn=save_to_sheet, inputs=[fb_original, fb_ai_output, fb_user_correct, fb_direction], outputs=status_lbl, ) gr.Markdown("---") gr.HTML( "
Built with ❤️ for the Kurukh Community
" ) demo.launch()