shahidul034
/

readctrl

Model card Files Files and versions

readctrl / code /RL_model /unsloth_rl /highlighter.py

shahidul034's picture

Add files using upload-large-folder tool

c7a6fe6 verified 28 days ago

history blame contribute delete

4.01 kB

	import gradio as gr
	from transformers import AutoModel
	import torch

	# 1. Load the model (ensure you have transformers and torch installed)
	print("Loading model... This may take a moment.")
	model = AutoModel.from_pretrained(
	"zilliz/semantic-highlight-bilingual-v1",
	trust_remote_code=True
	)

	def process_and_highlight(question, context, threshold):
	if not question or not context:
	return "Please provide both a question and context."

	# 2. Run the model inference
	result = model.process(
	question=question,
	context=context,
	threshold=threshold,
	return_sentence_metrics=True
	)

	highlighted_sentences = result.get("highlighted_sentences", [])

	# 3. Create the highlighted HTML output
	# We iterate through the context and wrap highlighted sentences in HTML tags
	output_html = context

	# Sort highlighted sentences by length (descending) to avoid partial
	# matching issues if one sentence is a substring of another
	highlighted_sentences.sort(key=len, reverse=True)

	for sent in highlighted_sentences:
	# Use a bright yellow highlight style
	style = "background-color: #fff176; color: #000; padding: 2px; border-radius: 3px; font-weight: 500;"
	highlighted_tag = f'<span style="{style}">{sent}</span>'
	output_html = output_html.replace(sent, highlighted_tag)

	# Wrap in a container for better typography
	final_output = f"""
	<div style="font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; line-height: 1.8; font-size: 16px; color: #333;">
	{output_html}
	</div>
	"""

	# 4. Format metrics for the display
	metrics_str = "No specific probabilities returned."
	if "sentence_probabilities" in result:
	metrics_str = "\n".join([f"• {p:.4f}" for p in result["sentence_probabilities"]])

	return final_output, metrics_str

	# 5. Build the Gradio UI
	with gr.Blocks(theme=gr.themes.Soft(), title="Semantic Highlighter") as demo:
	gr.Markdown("# 🔍 Semantic Highlight Explorer")
	gr.Markdown("Identify and highlight parts of a text that answer a specific question using the Zilliz bilingual model.")

	with gr.Row():
	with gr.Column(scale=1):
	question_input = gr.Textbox(
	label="Question",
	placeholder="e.g., What are the symptoms of dehydration?",
	lines=2
	)
	context_input = gr.Textbox(
	label="Context / Full Text",
	placeholder="Paste the document text here...",
	lines=10
	)
	threshold_slider = gr.Slider(
	minimum=0.1, maximum=1.0, value=0.5, step=0.05,
	label="Confidence Threshold"
	)
	submit_btn = gr.Button("Analyze & Highlight", variant="primary")

	with gr.Column(scale=1):
	gr.Label("Highlighted Result")
	output_display = gr.HTML()

	with gr.Accordion("Sentence Metrics", open=False):
	metrics_display = gr.Textbox(label="Probabilities", lines=5)

	# Add example from your snippet
	gr.Examples(
	examples=[
	[
	"What are the symptoms of dehydration?",
	"Dehydration occurs when your body loses more fluid than you take in. Common signs include feeling thirsty and having a dry mouth. The human body is composed of about 60% water. Dark yellow urine and infrequent urination are warning signs. Water is essential for many bodily functions. Dizziness, fatigue, and headaches can indicate severe dehydration.",
	0.5
	]
	],
	inputs=[question_input, context_input, threshold_slider]
	)

	submit_btn.click(
	fn=process_and_highlight,
	inputs=[question_input, context_input, threshold_slider],
	outputs=[output_display, metrics_display]
	)

	if __name__ == "__main__":
	demo.launch(share=True)