roymukund's picture
Upload app.py
c67a7fb verified
raw
history blame
3.86 kB
import gradio as gr
from predict import predict
# Map dropdown option β†’ model path
LANGUAGE_MODELS = {
"Odia": "models/odia-pos-16K.pkl",
"Punjabi": "models/punjabi-pos.pkl",
"Dogri": "models/dogri-pos.pkl"
}
def highlight_ssf(text):
"""Add simple HTML highlighting for SSF structure and POS tags."""
import re
# Highlight sentence tags <Sentence ...> and brackets
text = re.sub(r"(&lt;/?Sentence[^&]*&gt;)", r"<span style='color:green; font-style:italic;'>\1</span>", text)
text = re.sub(r"(\(\(|\)\))", r"<span style='color:green; font-style:italic;'>\1</span>", text)
# Highlight <fs ...>
text = re.sub(r"(&lt;fs[^&]*&gt;)", r"<span style='color:darkorange;'>\1</span>", text)
# Highlight POS tags (3rd column) β†’ blue & bold
def repl_pos(match):
return f"{match.group(1)}<span style='color:blue; font-weight:bold;'>{match.group(2)}</span>{match.group(3)}"
text = re.sub(r"^(\s*\d+\t[^\t]+\t)([^\t]+)(.*)$", repl_pos, text, flags=re.MULTILINE)
return f"<pre style='font-family:monospace;'>{text}</pre>"
def process_file(language, file_obj, file_type):
model_path = LANGUAGE_MODELS.get(language)
if not model_path:
raise ValueError(f"No model available for {language}")
input_path = file_obj.name
output_path = f"result_{language}.txt"
result_file = predict(input_path, model_path, file_type, output_path)
with open(result_file, "r", encoding="utf-8") as f:
preview_raw = f.read(2000) # first ~2000 chars for preview
# If SSF, apply highlighting
if file_type == "ssf":
preview = highlight_ssf(preview_raw.replace("<", "&lt;").replace(">", "&gt;"))
else:
preview = f"<pre>{preview_raw}</pre>"
return result_file, preview
def main():
with gr.Blocks(css="""
.download-box {
background: linear-gradient(90deg, #00c6ff, #0072ff);
padding: 20px;
border-radius: 12px;
text-align: center;
color: white;
font-weight: bold;
font-size: 18px;
box-shadow: 0px 4px 8px rgba(0,0,0,0.1);
}
.download-box .wrap.svelte-1ipelgc {
justify-content: center !important;
}
.block-label {
color: black !important;
font-size: 18px !important;
font-weight: 600 !important;
}
""") as demo:
gr.HTML(
"""
<h1>🌍 Indic POS Tagger</h1>
<p>Upload text or CoNLL files and get POS-tagged output</p>
"""
)
with gr.Row():
with gr.Column(scale=1):
language = gr.Dropdown(
["Odia", "Punjabi", "Dogri"],
label="🌐 Select Language",
value="Odia"
)
file_in = gr.File(
label="πŸ“‚ Upload Input File",
file_types=[".txt", ".conll"]
)
file_type = gr.Radio(
["plain", "conll", "ssf"],
label="πŸ“„ File Type",
value="plain"
)
submit = gr.Button("πŸš€ Run POS Tagger", variant="primary")
with gr.Column(scale=1):
output_file = gr.File(label="⬇️ Download Tagged File", file_types=[".txt", ".conll", ".ssf"])
preview_text = gr.HTML(label="πŸ‘€ Preview (first lines)")
submit.click(process_file, inputs=[language, file_in, file_type],
outputs=[output_file, preview_text])
demo.launch()
if __name__ == "__main__":
main()