shahidul034's picture
Add files using upload-large-folder tool
c7a6fe6 verified
import gradio as gr
from transformers import AutoModel
import torch
# 1. Load the model (ensure you have transformers and torch installed)
print("Loading model... This may take a moment.")
model = AutoModel.from_pretrained(
"zilliz/semantic-highlight-bilingual-v1",
trust_remote_code=True
)
def process_and_highlight(question, context, threshold):
if not question or not context:
return "Please provide both a question and context."
# 2. Run the model inference
result = model.process(
question=question,
context=context,
threshold=threshold,
return_sentence_metrics=True
)
highlighted_sentences = result.get("highlighted_sentences", [])
# 3. Create the highlighted HTML output
# We iterate through the context and wrap highlighted sentences in HTML tags
output_html = context
# Sort highlighted sentences by length (descending) to avoid partial
# matching issues if one sentence is a substring of another
highlighted_sentences.sort(key=len, reverse=True)
for sent in highlighted_sentences:
# Use a bright yellow highlight style
style = "background-color: #fff176; color: #000; padding: 2px; border-radius: 3px; font-weight: 500;"
highlighted_tag = f'<span style="{style}">{sent}</span>'
output_html = output_html.replace(sent, highlighted_tag)
# Wrap in a container for better typography
final_output = f"""
<div style="font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; line-height: 1.8; font-size: 16px; color: #333;">
{output_html}
</div>
"""
# 4. Format metrics for the display
metrics_str = "No specific probabilities returned."
if "sentence_probabilities" in result:
metrics_str = "\n".join([f"• {p:.4f}" for p in result["sentence_probabilities"]])
return final_output, metrics_str
# 5. Build the Gradio UI
with gr.Blocks(theme=gr.themes.Soft(), title="Semantic Highlighter") as demo:
gr.Markdown("# 🔍 Semantic Highlight Explorer")
gr.Markdown("Identify and highlight parts of a text that answer a specific question using the Zilliz bilingual model.")
with gr.Row():
with gr.Column(scale=1):
question_input = gr.Textbox(
label="Question",
placeholder="e.g., What are the symptoms of dehydration?",
lines=2
)
context_input = gr.Textbox(
label="Context / Full Text",
placeholder="Paste the document text here...",
lines=10
)
threshold_slider = gr.Slider(
minimum=0.1, maximum=1.0, value=0.5, step=0.05,
label="Confidence Threshold"
)
submit_btn = gr.Button("Analyze & Highlight", variant="primary")
with gr.Column(scale=1):
gr.Label("Highlighted Result")
output_display = gr.HTML()
with gr.Accordion("Sentence Metrics", open=False):
metrics_display = gr.Textbox(label="Probabilities", lines=5)
# Add example from your snippet
gr.Examples(
examples=[
[
"What are the symptoms of dehydration?",
"Dehydration occurs when your body loses more fluid than you take in. Common signs include feeling thirsty and having a dry mouth. The human body is composed of about 60% water. Dark yellow urine and infrequent urination are warning signs. Water is essential for many bodily functions. Dizziness, fatigue, and headaches can indicate severe dehydration.",
0.5
]
],
inputs=[question_input, context_input, threshold_slider]
)
submit_btn.click(
fn=process_and_highlight,
inputs=[question_input, context_input, threshold_slider],
outputs=[output_display, metrics_display]
)
if __name__ == "__main__":
demo.launch(share=True)