import gradio as gr from transformers import AutoModel import torch # 1. Load the model (ensure you have transformers and torch installed) print("Loading model... This may take a moment.") model = AutoModel.from_pretrained( "zilliz/semantic-highlight-bilingual-v1", trust_remote_code=True ) def process_and_highlight(question, context, threshold): if not question or not context: return "Please provide both a question and context." # 2. Run the model inference result = model.process( question=question, context=context, threshold=threshold, return_sentence_metrics=True ) highlighted_sentences = result.get("highlighted_sentences", []) # 3. Create the highlighted HTML output # We iterate through the context and wrap highlighted sentences in HTML tags output_html = context # Sort highlighted sentences by length (descending) to avoid partial # matching issues if one sentence is a substring of another highlighted_sentences.sort(key=len, reverse=True) for sent in highlighted_sentences: # Use a bright yellow highlight style style = "background-color: #fff176; color: #000; padding: 2px; border-radius: 3px; font-weight: 500;" highlighted_tag = f'{sent}' output_html = output_html.replace(sent, highlighted_tag) # Wrap in a container for better typography final_output = f"""
{output_html}
""" # 4. Format metrics for the display metrics_str = "No specific probabilities returned." if "sentence_probabilities" in result: metrics_str = "\n".join([f"• {p:.4f}" for p in result["sentence_probabilities"]]) return final_output, metrics_str # 5. Build the Gradio UI with gr.Blocks(theme=gr.themes.Soft(), title="Semantic Highlighter") as demo: gr.Markdown("# 🔍 Semantic Highlight Explorer") gr.Markdown("Identify and highlight parts of a text that answer a specific question using the Zilliz bilingual model.") with gr.Row(): with gr.Column(scale=1): question_input = gr.Textbox( label="Question", placeholder="e.g., What are the symptoms of dehydration?", lines=2 ) context_input = gr.Textbox( label="Context / Full Text", placeholder="Paste the document text here...", lines=10 ) threshold_slider = gr.Slider( minimum=0.1, maximum=1.0, value=0.5, step=0.05, label="Confidence Threshold" ) submit_btn = gr.Button("Analyze & Highlight", variant="primary") with gr.Column(scale=1): gr.Label("Highlighted Result") output_display = gr.HTML() with gr.Accordion("Sentence Metrics", open=False): metrics_display = gr.Textbox(label="Probabilities", lines=5) # Add example from your snippet gr.Examples( examples=[ [ "What are the symptoms of dehydration?", "Dehydration occurs when your body loses more fluid than you take in. Common signs include feeling thirsty and having a dry mouth. The human body is composed of about 60% water. Dark yellow urine and infrequent urination are warning signs. Water is essential for many bodily functions. Dizziness, fatigue, and headaches can indicate severe dehydration.", 0.5 ] ], inputs=[question_input, context_input, threshold_slider] ) submit_btn.click( fn=process_and_highlight, inputs=[question_input, context_input, threshold_slider], outputs=[output_display, metrics_display] ) if __name__ == "__main__": demo.launch(share=True)