import gradio as gr from predict import predict # Map dropdown option → model path LANGUAGE_MODELS = { "Odia": "models/odia-pos-16K.pkl", "Punjabi": "models/punjabi-pos.pkl", "Dogri": "models/dogri-pos.pkl" } def highlight_ssf(text): """Add simple HTML highlighting for SSF structure and POS tags.""" import re # Highlight sentence tags and brackets text = re.sub(r"(</?Sentence[^&]*>)", r"\1", text) text = re.sub(r"(\(\(|\)\))", r"\1", text) # Highlight text = re.sub(r"(<fs[^&]*>)", r"\1", text) # Highlight POS tags (3rd column) → blue & bold def repl_pos(match): return f"{match.group(1)}{match.group(2)}{match.group(3)}" text = re.sub(r"^(\s*\d+\t[^\t]+\t)([^\t]+)(.*)$", repl_pos, text, flags=re.MULTILINE) return f"
{text}
" def process_file(language, file_obj, file_type): model_path = LANGUAGE_MODELS.get(language) if not model_path: raise ValueError(f"No model available for {language}") input_path = file_obj.name output_path = f"result_{language}.txt" result_file = predict(input_path, model_path, file_type, output_path) with open(result_file, "r", encoding="utf-8") as f: preview_raw = f.read(2000) # first ~2000 chars for preview # If SSF, apply highlighting if file_type == "ssf": preview = highlight_ssf(preview_raw.replace("<", "<").replace(">", ">")) else: preview = f"
{preview_raw}
" return result_file, preview def main(): with gr.Blocks(css=""" .download-box { background: linear-gradient(90deg, #00c6ff, #0072ff); padding: 20px; border-radius: 12px; text-align: center; color: white; font-weight: bold; font-size: 18px; box-shadow: 0px 4px 8px rgba(0,0,0,0.1); } .download-box .wrap.svelte-1ipelgc { justify-content: center !important; } .block-label { color: black !important; font-size: 18px !important; font-weight: 600 !important; } """) as demo: gr.HTML( """

🌍 Indic POS Tagger

Upload text or CoNLL files and get POS-tagged output

""" ) with gr.Row(): with gr.Column(scale=1): language = gr.Dropdown( ["Odia", "Punjabi", "Dogri"], label="🌐 Select Language", value="Odia" ) file_in = gr.File( label="📂 Upload Input File", file_types=[".txt", ".conll"] ) file_type = gr.Radio( ["plain", "conll", "ssf"], label="📄 File Type", value="plain" ) submit = gr.Button("🚀 Run POS Tagger", variant="primary") with gr.Column(scale=1): output_file = gr.File(label="⬇️ Download Tagged File", file_types=[".txt", ".conll", ".ssf"]) preview_text = gr.HTML(label="👀 Preview (first lines)") submit.click(process_file, inputs=[language, file_in, file_type], outputs=[output_file, preview_text]) demo.launch() if __name__ == "__main__": main()