| import gradio as gr |
| from utils.ner_helpers import is_llm_model |
| import pandas as pd |
| import json |
| from typing import Dict, List |
| from tasks.pos_tagging import pos_tagging |
| from utils.pos_helpers import * |
|
|
| |
|
|
| def pos_ui(): |
|
|
| |
| with gr.Row(): |
| with gr.Column(scale=2): |
| input_text = gr.Textbox( |
| label="Input Text", |
| lines=8, |
| placeholder="Enter text to analyze for part-of-speech tags...", |
| elem_id="pos-input-text" |
| ) |
| gr.Examples( |
| examples=[ |
| ["The cat is sitting on the mat."], |
| ["She quickly finished her homework before dinner."] |
| ], |
| inputs=[input_text], |
| label="Examples" |
| ) |
| |
| with gr.Group(): |
| tag_selection = gr.CheckboxGroup( |
| label="POS Tags to Display", |
| |
| choices=[tag for tag in POS_TAG_DESCRIPTIONS.keys()], |
| value=DEFAULT_SELECTED_TAGS, |
| interactive=True |
| ) |
| with gr.Row(): |
| select_all_btn = gr.Button("Select All", size="sm") |
| clear_all_btn = gr.Button("Clear All", size="sm") |
| |
| with gr.Row(): |
| model_dropdown = gr.Dropdown( |
| POS_MODELS, |
| value=DEFAULT_MODEL, |
| label="Model", |
| interactive=True, |
| elem_id="pos-model-dropdown" |
| ) |
| custom_instructions = gr.Textbox( |
| label="Custom Instructions (optional)", |
| lines=2, |
| placeholder="Add any custom instructions for the model...", |
| elem_id="pos-custom-instructions" |
| ) |
| |
| submit_btn = gr.Button("Tag Text", variant="primary", elem_id="pos-submit-btn") |
| |
| def select_all_tags(): |
| return gr.CheckboxGroup(value=DEFAULT_SELECTED_TAGS) |
| def clear_all_tags(): |
| return gr.CheckboxGroup(value=[]) |
| select_all_btn.click( |
| fn=select_all_tags, |
| outputs=[tag_selection] |
| ) |
| clear_all_btn.click( |
| fn=clear_all_tags, |
| outputs=[tag_selection] |
| ) |
| with gr.Column(scale=3): |
| |
| with gr.Tabs() as output_tabs: |
| with gr.Tab("Tagged View", id="tagged-view"): |
| no_results_html = gr.HTML( |
| "<div style='text-align: center; color: #666; padding: 20px;'>" |
| "Enter text and click 'Tag Text' to analyze.</div>", |
| visible=True |
| ) |
| output_html = gr.HTML( |
| label="POS Tags", |
| elem_id="pos-output-html", |
| visible=False |
| ) |
| with gr.Tab("Table View", id="table-view"): |
| no_results_table = gr.HTML( |
| "<div style='text-align: center; color: #666; padding: 20px;'>" |
| "Enter text and click 'Tag Text' to analyze.</div>", |
| visible=True |
| ) |
| output_table = gr.Dataframe( |
| label="POS Tags", |
| headers=["Token", "POS Tag"], |
| datatype=["str", "str"], |
| interactive=False, |
| wrap=True, |
| elem_id="pos-output-table", |
| visible=False |
| ) |
| |
| gr.HTML(""" |
| <style> |
| #pos-output-html .pos-highlight { |
| white-space: pre-wrap; |
| line-height: 1.8; |
| font-size: 14px; |
| padding: 15px; |
| border: 1px solid #e0e0e0; |
| border-radius: 4px; |
| background: #f9f9f9; |
| } |
| #pos-output-html .pos-token { |
| display: inline-block; |
| margin: 0 2px 4px 0; |
| vertical-align: top; |
| text-align: center; |
| } |
| #pos-output-html .token-text { |
| display: block; |
| padding: 2px 8px; |
| background: #f0f4f8; |
| border-radius: 4px 4px 0 0; |
| border: 1px solid #dbe4ed; |
| border-bottom: none; |
| font-size: 0.9em; |
| } |
| #pos-output-html .pos-tag { |
| display: block; |
| padding: 2px 8px; |
| border-radius: 0 0 4px 4px; |
| font-size: 0.8em; |
| font-family: 'Courier New', monospace; |
| border: 1px solid; |
| border-top: none; |
| } |
| /* Color coding for common POS tags */ |
| #pos-output-html .NOUN { background-color: #e3f2fd; border-color: #bbdefb; color: #0d47a1; } |
| #pos-output-html .VERB { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; } |
| #pos-output-html .ADJ { background-color: #fff8e1; border-color: #ffecb3; color: #ff6f00; } |
| #pos-output-html .ADV { background-color: #f3e5f5; border-color: #e1bee7; color: #4a148c; } |
| #pos-output-html .PRON { background-color: #e8eaf6; border-color: #c5cae9; color: #1a237e; } |
| #pos-output-html .DET { background-color: #e0f7fa; border-color: #b2ebf2; color: #006064; } |
| #pos-output-html .ADP { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; } |
| #pos-output-html .CONJ, #pos-output-html .CCONJ, #pos-output-html .SCONJ { background-color: #fce4ec; border-color: #f8bbd0; color: #880e4f; } |
| #pos-output-html .NUM { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; font-weight: bold; } |
| #pos-output-html .PUNCT { background-color: #f5f5f5; border-color: #e0e0e0; color: #424242; } |
| #pos-output-html .X, #pos-output-html .SYM { background-color: #fafafa; border-color: #f5f5f5; color: #616161; } |
| #pos-output-html .PROPN { background-color: #e1f5fe; border-color: #b3e5fc; color: #01579b; font-weight: bold; } |
| #pos-output-html .AUX { background-color: #f3e5f5; border-color: #e1bee7; color: #6a1b9a; } |
| #pos-output-html .PART { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; font-style: italic; } |
| #pos-output-html .INTJ { background-color: #fff3e0; border-color: #ffe0b2; color: #e65100; } |
| </style> |
| """) |
| def format_pos_result(result, selected_tags=None): |
| import html |
| if not result or "tokens" not in result or "tags" not in result: |
| return "<div style='text-align: center; color: #666; padding: 20px;'>No POS tags found or invalid result format.</div>", pd.DataFrame(columns=["Token", "POS Tag"]) |
| if selected_tags is None: |
| selected_tags = list(POS_TAG_DESCRIPTIONS.keys()) |
| pos_colors = { |
| "NOUN": "#e3f2fd", "VERB": "#e8f5e9", "ADJ": "#fff8e1", |
| "ADV": "#f3e5f5", "PRON": "#e8eaf6", "DET": "#e0f7fa", |
| "ADP": "#f1f8e9", "CONJ": "#fce4ec", "CCONJ": "#fce4ec", |
| "SCONJ": "#fce4ec", "NUM": "#e8f5e9", "PUNCT": "#f5f5f5", |
| "X": "#fafafa", "SYM": "#fafafa", "PROPN": "#e1f5fe", |
| "AUX": "#f3e5f5", "PART": "#f1f8e9", "INTJ": "#fff3e0" |
| } |
| html_parts = ['<div style="line-height:1.6;padding:15px;border:1px solid #e0e0e0;border-radius:4px;background:#f9f9f9;white-space:pre-wrap;">'] |
| df_data = [] |
| for word, tag in zip(result["tokens"], result["tags"]): |
| clean_tag = tag.split('-')[0].split('_')[0].upper() |
| if clean_tag not in STANDARD_POS_TAGS: |
| clean_tag = "X" |
| df_data.append({"Token": word, "POS Tag": clean_tag}) |
| if clean_tag not in selected_tags: |
| html_parts.append(f'{html.escape(word)} ') |
| continue |
| color = pos_colors.get(clean_tag, "#f0f0f0") |
| html_parts.append(f'<span style="background:{color};border-radius:3px;padding:0 2px;margin:0 1px;border:1px solid rgba(0,0,0,0.1);">') |
| html_parts.append(f'{html.escape(word)} ') |
| html_parts.append(f'<span style="font-size:0.7em;font-weight:bold;color:#555;border-radius:2px;padding:0 2px;background:rgba(255,255,255,0.7);">{clean_tag}</span>') |
| html_parts.append('</span>') |
| html_parts.append('</div>') |
| import pandas as pd |
| df = pd.DataFrame(df_data) |
| if selected_tags is not None: |
| df = df[df["POS Tag"].isin(selected_tags)] |
| df = df.reset_index(drop=True) |
| return "".join(html_parts), df |
| def process_pos(text: str, model: str, custom_instructions: str, selected_tags: list): |
| if not text.strip(): |
| return [ |
| gr.HTML("<div style='color: #f44336; padding: 20px;'>Please enter some text to analyze.</div>", visible=True), |
| gr.HTML(visible=False), |
| gr.DataFrame(visible=False), |
| gr.HTML(visible=False) |
| ] |
| use_llm = is_llm_model(model) |
| if not selected_tags: |
| selected_tags = list(POS_TAG_DESCRIPTIONS.keys()) |
| try: |
| yield [ |
| gr.HTML("<div class='pos-highlight'>Processing... This may take a moment for large texts.</div>", visible=True), |
| gr.HTML(visible=False), |
| gr.DataFrame(visible=False), |
| gr.HTML(visible=False) |
| ] |
| result = pos_tagging( |
| text=text, |
| model=model, |
| custom_instructions=custom_instructions if use_llm else "", |
| use_llm=use_llm |
| ) |
| if "error" in result: |
| error_msg = result['error'] |
| if "API key" in error_msg or "authentication" in error_msg.lower(): |
| error_msg += " Please check your API key configuration." |
| yield [ |
| gr.HTML(f"<div style='color: #d32f2f; padding: 20px;'>{error_msg}</div>", visible=True), |
| gr.HTML(visible=False), |
| gr.DataFrame(visible=False), |
| gr.HTML(visible=False) |
| ] |
| return |
| html, table = format_pos_result(result, selected_tags) |
| if not table.empty: |
| yield [ |
| gr.HTML(html, visible=True), |
| gr.HTML(visible=False), |
| gr.DataFrame(value=table, visible=True), |
| gr.HTML(visible=False) |
| ] |
| else: |
| empty_msg = "<div class='pos-highlight' style='text-align: center; color: #666; padding: 20px;'>No POS tags could be extracted from the text.</div>" |
| yield [ |
| gr.HTML(empty_msg, visible=True), |
| gr.HTML(visible=False), |
| gr.DataFrame(visible=False), |
| gr.HTML(empty_msg, visible=True) |
| ] |
| except Exception as e: |
| import traceback |
| error_msg = f"Error processing request: {str(e)}\n\n{traceback.format_exc()}" |
| print(error_msg) |
| yield [ |
| gr.HTML("<div class='pos-highlight' style='color: #d32f2f; padding: 20px;'>An error occurred while processing your request. Please try again.</div>", visible=True), |
| gr.HTML(visible=False), |
| gr.DataFrame(visible=False), |
| gr.HTML(visible=False) |
| ] |
| def update_ui(model_name: str) -> Dict: |
| use_llm = is_llm_model(model_name) |
| return { |
| custom_instructions: gr.Textbox(visible=use_llm) |
| } |
| def clear_inputs(): |
| return "", "", "" |
| model_dropdown.change( |
| fn=update_ui, |
| inputs=[model_dropdown], |
| outputs=[custom_instructions] |
| ) |
| submit_btn.click( |
| fn=process_pos, |
| inputs=[input_text, model_dropdown, custom_instructions, tag_selection], |
| outputs=[output_html, no_results_html, output_table, no_results_table], |
| show_progress=True |
| ) |
| gr.HTML(""" |
| <style> |
| /* Style for the tabs */ |
| #tagged-view, #table-view { |
| padding: 15px; |
| } |
| /* Make the tabs more visible */ |
| .tab-nav { |
| margin-bottom: 10px; |
| border-bottom: 1px solid #e0e0e0; |
| } |
| .tab-nav button { |
| padding: 8px 16px; |
| margin-right: 5px; |
| border: 1px solid #e0e0e0; |
| background: #f5f5f5; |
| border-radius: 4px 4px 0 0; |
| cursor: pointer; |
| } |
| .tab-nav button.selected { |
| background: #ffffff; |
| border-bottom: 2px solid #0e7490; |
| font-weight: bold; |
| } |
| </style> |
| """) |
| custom_instructions.visible = is_llm_model(DEFAULT_MODEL) |
| return None |
|
|