File size: 4,010 Bytes
c7a6fe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
from transformers import AutoModel
import torch

# 1. Load the model (ensure you have transformers and torch installed)
print("Loading model... This may take a moment.")
model = AutoModel.from_pretrained(
    "zilliz/semantic-highlight-bilingual-v1",
    trust_remote_code=True
)

def process_and_highlight(question, context, threshold):
    if not question or not context:
        return "Please provide both a question and context."

    # 2. Run the model inference
    result = model.process(
        question=question,
        context=context,
        threshold=threshold,
        return_sentence_metrics=True
    )

    highlighted_sentences = result.get("highlighted_sentences", [])
    
    # 3. Create the highlighted HTML output
    # We iterate through the context and wrap highlighted sentences in HTML tags
    output_html = context
    
    # Sort highlighted sentences by length (descending) to avoid partial 
    # matching issues if one sentence is a substring of another
    highlighted_sentences.sort(key=len, reverse=True)

    for sent in highlighted_sentences:
        # Use a bright yellow highlight style
        style = "background-color: #fff176; color: #000; padding: 2px; border-radius: 3px; font-weight: 500;"
        highlighted_tag = f'<span style="{style}">{sent}</span>'
        output_html = output_html.replace(sent, highlighted_tag)

    # Wrap in a container for better typography
    final_output = f"""
    <div style="font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; line-height: 1.8; font-size: 16px; color: #333;">
        {output_html}
    </div>
    """
    
    # 4. Format metrics for the display
    metrics_str = "No specific probabilities returned."
    if "sentence_probabilities" in result:
        metrics_str = "\n".join([f"• {p:.4f}" for p in result["sentence_probabilities"]])

    return final_output, metrics_str

# 5. Build the Gradio UI
with gr.Blocks(theme=gr.themes.Soft(), title="Semantic Highlighter") as demo:
    gr.Markdown("# 🔍 Semantic Highlight Explorer")
    gr.Markdown("Identify and highlight parts of a text that answer a specific question using the Zilliz bilingual model.")
    
    with gr.Row():
        with gr.Column(scale=1):
            question_input = gr.Textbox(
                label="Question", 
                placeholder="e.g., What are the symptoms of dehydration?",
                lines=2
            )
            context_input = gr.Textbox(
                label="Context / Full Text", 
                placeholder="Paste the document text here...",
                lines=10
            )
            threshold_slider = gr.Slider(
                minimum=0.1, maximum=1.0, value=0.5, step=0.05, 
                label="Confidence Threshold"
            )
            submit_btn = gr.Button("Analyze & Highlight", variant="primary")
            
        with gr.Column(scale=1):
            gr.Label("Highlighted Result")
            output_display = gr.HTML()
            
            with gr.Accordion("Sentence Metrics", open=False):
                metrics_display = gr.Textbox(label="Probabilities", lines=5)

    # Add example from your snippet
    gr.Examples(
        examples=[
            [
                "What are the symptoms of dehydration?",
                "Dehydration occurs when your body loses more fluid than you take in. Common signs include feeling thirsty and having a dry mouth. The human body is composed of about 60% water. Dark yellow urine and infrequent urination are warning signs. Water is essential for many bodily functions. Dizziness, fatigue, and headaches can indicate severe dehydration.",
                0.5
            ]
        ],
        inputs=[question_input, context_input, threshold_slider]
    )

    submit_btn.click(
        fn=process_and_highlight,
        inputs=[question_input, context_input, threshold_slider],
        outputs=[output_display, metrics_display]
    )

if __name__ == "__main__":
    demo.launch(share=True)