import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM import os import json import gspread from oauth2client.service_account import ServiceAccountCredentials from datetime import datetime from gtts import gTTS import tempfile import requests # --- CONFIGURATION --- MODEL_K2H_REPO = "ankitklakra/kurukh-to-hindi" MODEL_H2K_REPO = "ankitklakra/hindi-to-kurukh" SHEET_NAME = "Kurukh_Feedback_Log" print("Loading Translation Models...") try: tokenizer = AutoTokenizer.from_pretrained("google/mt5-small") model_k2h = AutoModelForSeq2SeqLM.from_pretrained(MODEL_K2H_REPO) model_h2k = AutoModelForSeq2SeqLM.from_pretrained(MODEL_H2K_REPO) pipe_k2h = pipeline("text2text-generation", model=model_k2h, tokenizer=tokenizer) pipe_h2k = pipeline("text2text-generation", model=model_h2k, tokenizer=tokenizer) except Exception as e: print(f"Error loading translation models: {e}") print("Loading Voice Model...") try: asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") except Exception as e: print(f"Error loading whisper model: {e}") asr_pipeline = None # --- HELPER FUNCTIONS --- def transliterate_to_hindi(text): try: url = "https://inputtools.google.com/request?text={}&itc=hi-t-i0-und&num=1" response = requests.get(url.format(text)) result = response.json() return result[1][0][1][0] except: return text def save_to_sheet(original, translation, correction, direction): # --- VALIDATION CHECK --- if not original or not original.strip(): return "⚠️ Error: Original text is missing." if not correction or not correction.strip(): return "⚠️ Error: Please enter your correction before submitting." try: json_creds = os.getenv("GOOGLE_CREDENTIALS") if not json_creds: return "⚠️ Error: Credentials missing." creds_dict = json.loads(json_creds) scope = [ "https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive", ] creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope) client = gspread.authorize(creds) sheet = client.open(SHEET_NAME).sheet1 if not sheet.get_all_values(): sheet.append_row( [ "Timestamp", "Direction", "Original Text", "AI Translation", "User Correction", ] ) sheet.append_row( [str(datetime.now()), direction, original, translation, correction] ) return "✅ Saved to Google Sheets." except Exception as e: return f"❌ Error: {str(e)}" def speech_to_text(audio_path): if audio_path is None or asr_pipeline is None: return "" return asr_pipeline(audio_path)["text"] def text_to_speech(text, language="hi"): if not text: return None try: tts = gTTS(text=text, lang=language) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(temp_file.name) return temp_file.name except: return None # --- MAIN TRANSLATION LOGIC --- def process_translation(text, audio_input, direction, is_hinglish): original_text = speech_to_text(audio_input) if audio_input else text if not original_text: return "", "", None if direction == "Hindi -> Kurukh" and is_hinglish: original_text = transliterate_to_hindi(original_text) target_pipeline = pipe_k2h if direction == "Kurukh -> Hindi" else pipe_h2k try: results = target_pipeline( original_text, max_length=128, num_beams=5, no_repeat_ngram_size=2, repetition_penalty=2.0, early_stopping=True, ) translated_text = results[0]["generated_text"] except Exception as e: return str(e), "", None audio_output = None if direction == "Kurukh -> Hindi": audio_output = text_to_speech(translated_text, "hi") return original_text, translated_text, audio_output # --- CSS --- universal_css = """ """ # --- UI --- with gr.Blocks(title="Kurukh AI Translator") as demo: gr.HTML(universal_css) gr.HTML("""
Bridging Communities with Artificial Intelligence | Voice & Hinglish Supported