Spaces:
Running
Running
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
| import os | |
| import json | |
| import gspread | |
| from oauth2client.service_account import ServiceAccountCredentials | |
| from datetime import datetime | |
| from gtts import gTTS | |
| import tempfile | |
| import requests | |
| # --- CONFIGURATION --- | |
| MODEL_K2H_REPO = "ankitklakra/kurukh-to-hindi" | |
| MODEL_H2K_REPO = "ankitklakra/hindi-to-kurukh" | |
| SHEET_NAME = "Kurukh_Feedback_Log" | |
| print("Loading Translation Models...") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained("google/mt5-small") | |
| model_k2h = AutoModelForSeq2SeqLM.from_pretrained(MODEL_K2H_REPO) | |
| model_h2k = AutoModelForSeq2SeqLM.from_pretrained(MODEL_H2K_REPO) | |
| pipe_k2h = pipeline("text2text-generation", model=model_k2h, tokenizer=tokenizer) | |
| pipe_h2k = pipeline("text2text-generation", model=model_h2k, tokenizer=tokenizer) | |
| except Exception as e: | |
| print(f"Error loading translation models: {e}") | |
| print("Loading Voice Model...") | |
| try: | |
| asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") | |
| except Exception as e: | |
| print(f"Error loading whisper model: {e}") | |
| asr_pipeline = None | |
| # --- HELPER FUNCTIONS --- | |
| def transliterate_to_hindi(text): | |
| try: | |
| url = "https://inputtools.google.com/request?text={}&itc=hi-t-i0-und&num=1" | |
| response = requests.get(url.format(text)) | |
| result = response.json() | |
| return result[1][0][1][0] | |
| except: | |
| return text | |
| def save_to_sheet(original, translation, correction, direction): | |
| # --- VALIDATION CHECK --- | |
| if not original or not original.strip(): | |
| return "⚠️ Error: Original text is missing." | |
| if not correction or not correction.strip(): | |
| return "⚠️ Error: Please enter your correction before submitting." | |
| try: | |
| json_creds = os.getenv("GOOGLE_CREDENTIALS") | |
| if not json_creds: | |
| return "⚠️ Error: Credentials missing." | |
| creds_dict = json.loads(json_creds) | |
| scope = [ | |
| "https://spreadsheets.google.com/feeds", | |
| "https://www.googleapis.com/auth/drive", | |
| ] | |
| creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope) | |
| client = gspread.authorize(creds) | |
| sheet = client.open(SHEET_NAME).sheet1 | |
| if not sheet.get_all_values(): | |
| sheet.append_row( | |
| [ | |
| "Timestamp", | |
| "Direction", | |
| "Original Text", | |
| "AI Translation", | |
| "User Correction", | |
| ] | |
| ) | |
| sheet.append_row( | |
| [str(datetime.now()), direction, original, translation, correction] | |
| ) | |
| return "✅ Saved to Google Sheets." | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}" | |
| def speech_to_text(audio_path): | |
| if audio_path is None or asr_pipeline is None: | |
| return "" | |
| return asr_pipeline(audio_path)["text"] | |
| def text_to_speech(text, language="hi"): | |
| if not text: | |
| return None | |
| try: | |
| tts = gTTS(text=text, lang=language) | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(temp_file.name) | |
| return temp_file.name | |
| except: | |
| return None | |
| # --- MAIN TRANSLATION LOGIC --- | |
| def process_translation(text, audio_input, direction, is_hinglish): | |
| original_text = speech_to_text(audio_input) if audio_input else text | |
| if not original_text: | |
| return "", "", None | |
| if direction == "Hindi -> Kurukh" and is_hinglish: | |
| original_text = transliterate_to_hindi(original_text) | |
| target_pipeline = pipe_k2h if direction == "Kurukh -> Hindi" else pipe_h2k | |
| try: | |
| results = target_pipeline( | |
| original_text, | |
| max_length=128, | |
| num_beams=5, | |
| no_repeat_ngram_size=2, | |
| repetition_penalty=2.0, | |
| early_stopping=True, | |
| ) | |
| translated_text = results[0]["generated_text"] | |
| except Exception as e: | |
| return str(e), "", None | |
| audio_output = None | |
| if direction == "Kurukh -> Hindi": | |
| audio_output = text_to_speech(translated_text, "hi") | |
| return original_text, translated_text, audio_output | |
| # --- CSS --- | |
| universal_css = """ | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600&display=swap'); | |
| body, button, input, select, textarea, .gradio-container { | |
| font-family: 'Poppins', sans-serif !important; | |
| } | |
| .header-div { | |
| text-align: center; | |
| margin-bottom: 25px; | |
| padding: 20px; | |
| background: linear-gradient(to right, #f8f9fa, #e9ecef); | |
| border-radius: 15px; | |
| } | |
| .header-title { | |
| font-size: 2.2em; | |
| font-weight: 700; | |
| color: #2c3e50; | |
| } | |
| .header-subtitle { | |
| font-size: 1.1em; | |
| color: #576574; | |
| } | |
| </style> | |
| """ | |
| # --- UI --- | |
| with gr.Blocks(title="Kurukh AI Translator") as demo: | |
| gr.HTML(universal_css) | |
| gr.HTML(""" | |
| <div class="header-div"> | |
| <h1 class="header-title">🇮🇳 AI Kurukh (Oraon) Translator</h1> | |
| <p class="header-subtitle"> | |
| Bridging Communities with Artificial Intelligence | Voice & Hinglish Supported | |
| </p> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| # --- Translator Tab --- | |
| with gr.TabItem("🗣️ Translator"): | |
| with gr.Accordion("ℹ️ How to use (Click to expand)", open=False): | |
| gr.Markdown(""" | |
| 1. Select translation mode. | |
| 2. Enable Hinglish if typing Hindi in English letters. | |
| 3. Use Voice input if needed. | |
| """) | |
| with gr.Row(): | |
| # LEFT | |
| with gr.Column(): | |
| direction = gr.Radio( | |
| ["Kurukh -> Hindi", "Hindi -> Kurukh"], | |
| label="Translation Mode", | |
| value="Kurukh -> Hindi", | |
| ) | |
| is_hinglish = gr.Checkbox( | |
| label="🔤 Hinglish Typing (e.g., 'Tumhara')", value=False | |
| ) | |
| input_text = gr.Textbox( | |
| label="Enter Text", placeholder="Type sentences here...", lines=4 | |
| ) | |
| input_audio = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| label="🎙️ Voice Input (Hindi Only)", | |
| ) | |
| translate_btn = gr.Button("Translate 🚀") | |
| # RIGHT | |
| with gr.Column(): | |
| output_text = gr.Textbox( | |
| label="Translation", | |
| lines=4, | |
| interactive=False | |
| ) | |
| output_audio = gr.Audio( | |
| label="🔊 Listen (Hindi Only)", interactive=False | |
| ) | |
| # --- EXAMPLES SECTION --- | |
| gr.Markdown("### 💡 Try these examples:") | |
| gr.Examples( | |
| examples=[ | |
| # 1. Kurukh (Devanagari Script) | |
| ["निघै नामे इन्द्रा हिकै?", "Kurukh -> Hindi", False], | |
| # 2. Hindi (Devanagari Script) | |
| ["तुम कहाँ जा रहे हो?", "Hindi -> Kurukh", False], | |
| # 3. Hinglish (Roman Script -> needs Transliteration) | |
| ["Tum kahan ho?", "Hindi -> Kurukh", True], | |
| ], | |
| inputs=[input_text, direction, is_hinglish], | |
| label="Click on an example to load it:" | |
| ) | |
| translate_btn.click( | |
| fn=process_translation, | |
| inputs=[input_text, input_audio, direction, is_hinglish], | |
| outputs=[input_text, output_text, output_audio], | |
| ) | |
| # --- Feedback Tab --- | |
| with gr.TabItem("📝 Improve the AI"): | |
| gr.Markdown("### 🛠️ Help us improve accuracy") | |
| fb_direction = gr.Radio( | |
| ["Kurukh -> Hindi", "Hindi -> Kurukh"], | |
| label="Direction", | |
| value="Kurukh -> Hindi", | |
| ) | |
| fb_original = gr.Textbox(label="Original Text") | |
| fb_ai_output = gr.Textbox(label="AI's Translation") | |
| fb_user_correct = gr.Textbox( | |
| label="Correct Translation", lines=2 | |
| ) | |
| submit_btn = gr.Button("Submit Correction") | |
| status_lbl = gr.Label(label="Status") | |
| submit_btn.click( | |
| fn=save_to_sheet, | |
| inputs=[fb_original, fb_ai_output, fb_user_correct, fb_direction], | |
| outputs=status_lbl, | |
| ) | |
| gr.Markdown("---") | |
| gr.HTML( | |
| "<center style='color:#777;'>Built with ❤️ for the Kurukh Community</center>" | |
| ) | |
| demo.launch() |