Spaces:

ankitklakra
/

Kurukh-Translator

Running

App Files Files Community

Kurukh-Translator / app.py

ankitklakra

Update app.py

be438a0 verified 3 days ago

raw

history blame contribute delete

9 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
	import os
	import json
	import gspread
	from oauth2client.service_account import ServiceAccountCredentials
	from datetime import datetime
	from gtts import gTTS
	import tempfile
	import requests

	# --- CONFIGURATION ---
	MODEL_K2H_REPO = "ankitklakra/kurukh-to-hindi"
	MODEL_H2K_REPO = "ankitklakra/hindi-to-kurukh"
	SHEET_NAME = "Kurukh_Feedback_Log"


	print("Loading Translation Models...")
	try:
	tokenizer = AutoTokenizer.from_pretrained("google/mt5-small")
	model_k2h = AutoModelForSeq2SeqLM.from_pretrained(MODEL_K2H_REPO)
	model_h2k = AutoModelForSeq2SeqLM.from_pretrained(MODEL_H2K_REPO)

	pipe_k2h = pipeline("text2text-generation", model=model_k2h, tokenizer=tokenizer)
	pipe_h2k = pipeline("text2text-generation", model=model_h2k, tokenizer=tokenizer)
	except Exception as e:
	print(f"Error loading translation models: {e}")

	print("Loading Voice Model...")
	try:
	asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
	except Exception as e:
	print(f"Error loading whisper model: {e}")
	asr_pipeline = None

	# --- HELPER FUNCTIONS ---
	def transliterate_to_hindi(text):
	try:
	url = "https://inputtools.google.com/request?text={}&itc=hi-t-i0-und&num=1"
	response = requests.get(url.format(text))
	result = response.json()
	return result[1][0][1][0]
	except:
	return text

	def save_to_sheet(original, translation, correction, direction):
	# --- VALIDATION CHECK ---
	if not original or not original.strip():
	return "⚠️ Error: Original text is missing."

	if not correction or not correction.strip():
	return "⚠️ Error: Please enter your correction before submitting."

	try:
	json_creds = os.getenv("GOOGLE_CREDENTIALS")
	if not json_creds:
	return "⚠️ Error: Credentials missing."

	creds_dict = json.loads(json_creds)
	scope = [
	"https://spreadsheets.google.com/feeds",
	"https://www.googleapis.com/auth/drive",
	]
	creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
	client = gspread.authorize(creds)

	sheet = client.open(SHEET_NAME).sheet1

	if not sheet.get_all_values():
	sheet.append_row(
	[
	"Timestamp",
	"Direction",
	"Original Text",
	"AI Translation",
	"User Correction",
	]
	)

	sheet.append_row(
	[str(datetime.now()), direction, original, translation, correction]
	)

	return "✅ Saved to Google Sheets."
	except Exception as e:
	return f"❌ Error: {str(e)}"

	def speech_to_text(audio_path):
	if audio_path is None or asr_pipeline is None:
	return ""
	return asr_pipeline(audio_path)["text"]

	def text_to_speech(text, language="hi"):
	if not text:
	return None
	try:
	tts = gTTS(text=text, lang=language)
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_file.name)
	return temp_file.name
	except:
	return None

	# --- MAIN TRANSLATION LOGIC ---
	def process_translation(text, audio_input, direction, is_hinglish):
	original_text = speech_to_text(audio_input) if audio_input else text
	if not original_text:
	return "", "", None

	if direction == "Hindi -> Kurukh" and is_hinglish:
	original_text = transliterate_to_hindi(original_text)

	target_pipeline = pipe_k2h if direction == "Kurukh -> Hindi" else pipe_h2k

	try:
	results = target_pipeline(
	original_text,
	max_length=128,
	num_beams=5,
	no_repeat_ngram_size=2,
	repetition_penalty=2.0,
	early_stopping=True,
	)
	translated_text = results[0]["generated_text"]
	except Exception as e:
	return str(e), "", None

	audio_output = None
	if direction == "Kurukh -> Hindi":
	audio_output = text_to_speech(translated_text, "hi")

	return original_text, translated_text, audio_output


	# --- CSS ---
	universal_css = """
	<style>
	@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600&display=swap');
	body, button, input, select, textarea, .gradio-container {
	font-family: 'Poppins', sans-serif !important;
	}
	.header-div {
	text-align: center;
	margin-bottom: 25px;
	padding: 20px;
	background: linear-gradient(to right, #f8f9fa, #e9ecef);
	border-radius: 15px;
	}
	.header-title {
	font-size: 2.2em;
	font-weight: 700;
	color: #2c3e50;
	}
	.header-subtitle {
	font-size: 1.1em;
	color: #576574;
	}
	</style>
	"""


	# --- UI ---
	with gr.Blocks(title="Kurukh AI Translator") as demo:

	gr.HTML(universal_css)

	gr.HTML("""
	<div class="header-div">
	<h1 class="header-title">🇮🇳 AI Kurukh (Oraon) Translator</h1>
	<p class="header-subtitle">
	Bridging Communities with Artificial Intelligence \| Voice & Hinglish Supported
	</p>
	</div>
	""")

	with gr.Tabs():

	# --- Translator Tab ---
	with gr.TabItem("🗣️ Translator"):

	with gr.Accordion("ℹ️ How to use (Click to expand)", open=False):
	gr.Markdown("""
	1. Select translation mode.
	2. Enable Hinglish if typing Hindi in English letters.
	3. Use Voice input if needed.
	""")

	with gr.Row():

	# LEFT
	with gr.Column():
	direction = gr.Radio(
	["Kurukh -> Hindi", "Hindi -> Kurukh"],
	label="Translation Mode",
	value="Kurukh -> Hindi",
	)

	is_hinglish = gr.Checkbox(
	label="🔤 Hinglish Typing (e.g., 'Tumhara')", value=False
	)

	input_text = gr.Textbox(
	label="Enter Text", placeholder="Type sentences here...", lines=4
	)

	input_audio = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="🎙️ Voice Input (Hindi Only)",
	)

	translate_btn = gr.Button("Translate 🚀")

	# RIGHT
	with gr.Column():
	output_text = gr.Textbox(
	label="Translation",
	lines=4,
	interactive=False

	)
	output_audio = gr.Audio(
	label="🔊 Listen (Hindi Only)", interactive=False
	)

	# --- EXAMPLES SECTION ---
	gr.Markdown("### 💡 Try these examples:")
	gr.Examples(
	examples=[

	# 1. Kurukh (Devanagari Script)
	["निघै नामे इन्द्रा हिकै?", "Kurukh -> Hindi", False],

	# 2. Hindi (Devanagari Script)
	["तुम कहाँ जा रहे हो?", "Hindi -> Kurukh", False],

	# 3. Hinglish (Roman Script -> needs Transliteration)
	["Tum kahan ho?", "Hindi -> Kurukh", True],
	],
	inputs=[input_text, direction, is_hinglish],
	label="Click on an example to load it:"
	)

	translate_btn.click(
	fn=process_translation,
	inputs=[input_text, input_audio, direction, is_hinglish],
	outputs=[input_text, output_text, output_audio],
	)

	# --- Feedback Tab ---
	with gr.TabItem("📝 Improve the AI"):
	gr.Markdown("### 🛠️ Help us improve accuracy")

	fb_direction = gr.Radio(
	["Kurukh -> Hindi", "Hindi -> Kurukh"],
	label="Direction",
	value="Kurukh -> Hindi",
	)

	fb_original = gr.Textbox(label="Original Text")
	fb_ai_output = gr.Textbox(label="AI's Translation")
	fb_user_correct = gr.Textbox(
	label="Correct Translation", lines=2
	)

	submit_btn = gr.Button("Submit Correction")
	status_lbl = gr.Label(label="Status")

	submit_btn.click(
	fn=save_to_sheet,
	inputs=[fb_original, fb_ai_output, fb_user_correct, fb_direction],
	outputs=status_lbl,
	)

	gr.Markdown("---")
	gr.HTML(
	"<center style='color:#777;'>Built with ❤️ for the Kurukh Community</center>"
	)

	demo.launch()