Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from predict import predict | |
| # Map dropdown option β model path | |
| LANGUAGE_MODELS = { | |
| "Odia": "models/odia-pos-16K.pkl", | |
| "Punjabi": "models/punjabi-pos.pkl", | |
| "Dogri": "models/dogri-pos.pkl" | |
| } | |
| def highlight_ssf(text): | |
| """Add simple HTML highlighting for SSF structure and POS tags.""" | |
| import re | |
| # Highlight sentence tags <Sentence ...> and brackets | |
| text = re.sub(r"(</?Sentence[^&]*>)", r"<span style='color:green; font-style:italic;'>\1</span>", text) | |
| text = re.sub(r"(\(\(|\)\))", r"<span style='color:green; font-style:italic;'>\1</span>", text) | |
| # Highlight <fs ...> | |
| text = re.sub(r"(<fs[^&]*>)", r"<span style='color:darkorange;'>\1</span>", text) | |
| # Highlight POS tags (3rd column) β blue & bold | |
| def repl_pos(match): | |
| return f"{match.group(1)}<span style='color:blue; font-weight:bold;'>{match.group(2)}</span>{match.group(3)}" | |
| text = re.sub(r"^(\s*\d+\t[^\t]+\t)([^\t]+)(.*)$", repl_pos, text, flags=re.MULTILINE) | |
| return f"<pre style='font-family:monospace;'>{text}</pre>" | |
| def process_file(language, file_obj, file_type): | |
| model_path = LANGUAGE_MODELS.get(language) | |
| if not model_path: | |
| raise ValueError(f"No model available for {language}") | |
| input_path = file_obj.name | |
| output_path = f"result_{language}.txt" | |
| result_file = predict(input_path, model_path, file_type, output_path) | |
| with open(result_file, "r", encoding="utf-8") as f: | |
| preview_raw = f.read(2000) # first ~2000 chars for preview | |
| # If SSF, apply highlighting | |
| if file_type == "ssf": | |
| preview = highlight_ssf(preview_raw.replace("<", "<").replace(">", ">")) | |
| else: | |
| preview = f"<pre>{preview_raw}</pre>" | |
| return result_file, preview | |
| def main(): | |
| with gr.Blocks(css=""" | |
| .download-box { | |
| background: linear-gradient(90deg, #00c6ff, #0072ff); | |
| padding: 20px; | |
| border-radius: 12px; | |
| text-align: center; | |
| color: white; | |
| font-weight: bold; | |
| font-size: 18px; | |
| box-shadow: 0px 4px 8px rgba(0,0,0,0.1); | |
| } | |
| .download-box .wrap.svelte-1ipelgc { | |
| justify-content: center !important; | |
| } | |
| .block-label { | |
| color: black !important; | |
| font-size: 18px !important; | |
| font-weight: 600 !important; | |
| } | |
| """) as demo: | |
| gr.HTML( | |
| """ | |
| <h1>π Indic POS Tagger</h1> | |
| <p>Upload text or CoNLL files and get POS-tagged output</p> | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| language = gr.Dropdown( | |
| ["Odia", "Punjabi", "Dogri"], | |
| label="π Select Language", | |
| value="Odia" | |
| ) | |
| file_in = gr.File( | |
| label="π Upload Input File", | |
| file_types=[".txt", ".conll"] | |
| ) | |
| file_type = gr.Radio( | |
| ["plain", "conll", "ssf"], | |
| label="π File Type", | |
| value="plain" | |
| ) | |
| submit = gr.Button("π Run POS Tagger", variant="primary") | |
| with gr.Column(scale=1): | |
| output_file = gr.File(label="β¬οΈ Download Tagged File", file_types=[".txt", ".conll", ".ssf"]) | |
| preview_text = gr.HTML(label="π Preview (first lines)") | |
| submit.click(process_file, inputs=[language, file_in, file_type], | |
| outputs=[output_file, preview_text]) | |
| demo.launch() | |
| if __name__ == "__main__": | |
| main() | |