EmmaScharfmann commited on
Commit ·
c33bcd6
1
Parent(s): bbfaed3
push to huggingface space
Browse files- .env +1 -0
- README.md +33 -13
- config.py +6 -0
- custom_css.py +125 -0
- default_values.py +9 -0
- interface.py +76 -0
- main.py +5 -0
- requirements.txt +3 -0
- similarity.py +57 -0
.env
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
HF_TOKEN=
|
README.md
CHANGED
|
@@ -1,13 +1,33 @@
|
|
| 1 |
-
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
--
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🔍 Medical Document Search using sentence-transformers/embeddinggemma-300m-medical
|
| 2 |
+
|
| 3 |
+
A search tool specialized in the medical field to helps you find relevant information across your medical documents.
|
| 4 |
+
|
| 5 |
+
## How It Works
|
| 6 |
+
|
| 7 |
+
1. **Enter your question** in the reference sentence box (e.g., "Is Mr. Allen eligible for enrollment given his type 2 diabetes?")
|
| 8 |
+
2. **Add documents** to search through in the comparison sentence boxes
|
| 9 |
+
3. **Click "Calculate Similarity"** to see ranked results
|
| 10 |
+
4. **Review the scores**:
|
| 11 |
+
- 🟢 Green (≥0.70): High similarity - very relevant
|
| 12 |
+
- 🟠 Orange (0.50-0.69): Medium similarity - somewhat relevant
|
| 13 |
+
- 🟣 Purple (<0.50): Lower similarity - less relevant
|
| 14 |
+
|
| 15 |
+
## Installation
|
| 16 |
+
```bash
|
| 17 |
+
pip install -r requirements.txt
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
## Setup
|
| 21 |
+
|
| 22 |
+
1. Get a Hugging Face API token from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
|
| 23 |
+
2. Create a `.env` file in the project directory:
|
| 24 |
+
```
|
| 25 |
+
HF_TOKEN=your_token_here
|
| 26 |
+
```
|
| 27 |
+
3. Run the application:
|
| 28 |
+
```bash
|
| 29 |
+
python main.py
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
## Notes
|
| 33 |
+
Note that for efficiency purposes, a template of the interface code and the css code was generate with an LLM.
|
config.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
custom_css.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CUSTOM_CSS_FOR_INTERFACE = """
|
| 2 |
+
#title {
|
| 3 |
+
text-align: center;
|
| 4 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 5 |
+
color: white;
|
| 6 |
+
padding: 40px 20px;
|
| 7 |
+
border-radius: 16px;
|
| 8 |
+
margin-bottom: 30px;
|
| 9 |
+
}
|
| 10 |
+
#title h1 {
|
| 11 |
+
margin: 0;
|
| 12 |
+
font-size: 2.5em;
|
| 13 |
+
font-weight: 700;
|
| 14 |
+
}
|
| 15 |
+
#subtitle {
|
| 16 |
+
margin-top: 10px;
|
| 17 |
+
opacity: 0.95;
|
| 18 |
+
font-size: 1.1em;
|
| 19 |
+
}
|
| 20 |
+
.input-section {
|
| 21 |
+
background: #f7fafc;
|
| 22 |
+
padding: 24px;
|
| 23 |
+
border-radius: 12px;
|
| 24 |
+
border: 1px solid #e2e8f0;
|
| 25 |
+
}
|
| 26 |
+
#calc-btn {
|
| 27 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
| 28 |
+
border: none !important;
|
| 29 |
+
font-size: 16px !important;
|
| 30 |
+
font-weight: 600 !important;
|
| 31 |
+
padding: 12px 32px !important;
|
| 32 |
+
border-radius: 8px !important;
|
| 33 |
+
transition: transform 0.2s ease !important;
|
| 34 |
+
}
|
| 35 |
+
#calc-btn:hover {
|
| 36 |
+
transform: translateY(-2px);
|
| 37 |
+
box-shadow: 0 8px 16px rgba(102, 126, 234, 0.3) !important;
|
| 38 |
+
}
|
| 39 |
+
.add-btn {
|
| 40 |
+
background: #48bb78 !important;
|
| 41 |
+
border: none !important;
|
| 42 |
+
color: white !important;
|
| 43 |
+
font-weight: 600 !important;
|
| 44 |
+
border-radius: 8px !important;
|
| 45 |
+
margin-top: 12px !important;
|
| 46 |
+
}
|
| 47 |
+
.add-btn:hover {
|
| 48 |
+
background: #38a169 !important;
|
| 49 |
+
}
|
| 50 |
+
.comparison-box {
|
| 51 |
+
margin-bottom: 8px;
|
| 52 |
+
}
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
SIMILARITY_BARS = """
|
| 56 |
+
<style>
|
| 57 |
+
.similarity-container {
|
| 58 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
| 59 |
+
}
|
| 60 |
+
.similarity-item {
|
| 61 |
+
margin-bottom: 24px;
|
| 62 |
+
padding: 16px;
|
| 63 |
+
background: linear-gradient(to right, #f8f9fa 0%, #ffffff 100%);
|
| 64 |
+
border-radius: 12px;
|
| 65 |
+
border-left: 4px solid #667eea;
|
| 66 |
+
transition: transform 0.2s ease;
|
| 67 |
+
}
|
| 68 |
+
.similarity-item:hover {
|
| 69 |
+
transform: translateX(4px);
|
| 70 |
+
box-shadow: 0 4px 12px rgba(102, 126, 234, 0.15);
|
| 71 |
+
}
|
| 72 |
+
.sentence-text {
|
| 73 |
+
font-size: 15px;
|
| 74 |
+
color: #2d3748;
|
| 75 |
+
margin-bottom: 12px;
|
| 76 |
+
line-height: 1.6;
|
| 77 |
+
font-weight: 500;
|
| 78 |
+
}
|
| 79 |
+
.bar-container {
|
| 80 |
+
display: flex;
|
| 81 |
+
align-items: center;
|
| 82 |
+
gap: 12px;
|
| 83 |
+
}
|
| 84 |
+
.progress-bar {
|
| 85 |
+
flex: 1;
|
| 86 |
+
height: 28px;
|
| 87 |
+
background: #e2e8f0;
|
| 88 |
+
border-radius: 14px;
|
| 89 |
+
overflow: hidden;
|
| 90 |
+
position: relative;
|
| 91 |
+
}
|
| 92 |
+
.progress-fill {
|
| 93 |
+
height: 100%;
|
| 94 |
+
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
|
| 95 |
+
border-radius: 14px;
|
| 96 |
+
transition: width 0.6s ease;
|
| 97 |
+
display: flex;
|
| 98 |
+
align-items: center;
|
| 99 |
+
justify-content: flex-end;
|
| 100 |
+
padding-right: 10px;
|
| 101 |
+
}
|
| 102 |
+
.score-badge {
|
| 103 |
+
background: #667eea;
|
| 104 |
+
color: white;
|
| 105 |
+
padding: 6px 16px;
|
| 106 |
+
border-radius: 20px;
|
| 107 |
+
font-weight: 600;
|
| 108 |
+
font-size: 14px;
|
| 109 |
+
min-width: 70px;
|
| 110 |
+
text-align: center;
|
| 111 |
+
box-shadow: 0 2px 8px rgba(102, 126, 234, 0.3);
|
| 112 |
+
}
|
| 113 |
+
.rank-badge {
|
| 114 |
+
display: inline-block;
|
| 115 |
+
background: #f7fafc;
|
| 116 |
+
color: #4a5568;
|
| 117 |
+
padding: 4px 10px;
|
| 118 |
+
border-radius: 8px;
|
| 119 |
+
font-size: 12px;
|
| 120 |
+
font-weight: 600;
|
| 121 |
+
margin-right: 8px;
|
| 122 |
+
}
|
| 123 |
+
</style>
|
| 124 |
+
<div class="similarity-container">
|
| 125 |
+
"""
|
default_values.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEFAULT_DOCUMENTS = [
|
| 2 |
+
"Patients with diabetes are not eligible for the study.",
|
| 3 |
+
"Participants with moderate hepatic impairment received Drug 1 at a reduced dose of 100 mg once daily.",
|
| 4 |
+
"Patients with a history of alcoholic liver disease within the past 5 years were excluded from participation in the study.",
|
| 5 |
+
"Inclusion criteria required a liver biopsy performed within 6 months.",
|
| 6 |
+
"Drug 1 was administered orally once daily for a total treatment duration of 48 weeks.",
|
| 7 |
+
]
|
| 8 |
+
|
| 9 |
+
DEFAULT_QUESTION = "Is Mr. Allen eligible for enrollment given his type 2 diabetes?"
|
interface.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from similarity import compute_similarity
|
| 3 |
+
from custom_css import CUSTOM_CSS_FOR_INTERFACE
|
| 4 |
+
from default_values import DEFAULT_DOCUMENTS, DEFAULT_QUESTION
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def build_interface():
|
| 8 |
+
"""Build and return the Gradio interface"""
|
| 9 |
+
with gr.Blocks(css=CUSTOM_CSS_FOR_INTERFACE, theme=gr.themes.Soft()) as demo:
|
| 10 |
+
with gr.Column(elem_id="title"):
|
| 11 |
+
gr.HTML("<h1>🔍 Smart Medical Search</h1>")
|
| 12 |
+
gr.HTML("<h2> Ask questions, get instant answers from your documents</h2>")
|
| 13 |
+
|
| 14 |
+
with gr.Row():
|
| 15 |
+
with gr.Column(scale=1):
|
| 16 |
+
with gr.Group(elem_classes="input-section"):
|
| 17 |
+
reference_input = gr.Textbox(
|
| 18 |
+
label="📌 Enter your question about your medical documents:",
|
| 19 |
+
placeholder="Enter your question here...",
|
| 20 |
+
value=DEFAULT_QUESTION,
|
| 21 |
+
lines=3,
|
| 22 |
+
max_lines=5
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
gr.HTML("""<h3> 🔍 Your medical documents:</h3>""")
|
| 26 |
+
|
| 27 |
+
comparison_container = gr.Column()
|
| 28 |
+
|
| 29 |
+
with comparison_container:
|
| 30 |
+
comparison_inputs = []
|
| 31 |
+
for i in range(15):
|
| 32 |
+
value = DEFAULT_DOCUMENTS[i] if i < len(DEFAULT_DOCUMENTS) else ""
|
| 33 |
+
comparison_box = gr.Textbox(
|
| 34 |
+
label=f"Sentence {i + 1}",
|
| 35 |
+
placeholder="Enter a document to search...",
|
| 36 |
+
value=value,
|
| 37 |
+
lines=2,
|
| 38 |
+
elem_classes="comparison-box",
|
| 39 |
+
visible=True if i < 5 else False
|
| 40 |
+
)
|
| 41 |
+
comparison_inputs.append(comparison_box)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
add_btn = gr.Button("➕ Add another document", elem_classes="add-btn", size="md")
|
| 45 |
+
|
| 46 |
+
visible_count = gr.State(5)
|
| 47 |
+
|
| 48 |
+
add_btn.click(
|
| 49 |
+
fn=_add_comparison_box,
|
| 50 |
+
inputs=[visible_count],
|
| 51 |
+
outputs=[visible_count] + comparison_inputs
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
submit_btn = gr.Button("Search for the answer in your documents", variant="primary", elem_id="calc-btn", size="lg")
|
| 55 |
+
|
| 56 |
+
with gr.Column(scale=1):
|
| 57 |
+
output = gr.HTML(label="Similarity Scores")
|
| 58 |
+
|
| 59 |
+
submit_btn.click(
|
| 60 |
+
fn=compute_similarity,
|
| 61 |
+
inputs=[reference_input] + comparison_inputs,
|
| 62 |
+
outputs=output
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
return demo
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _add_comparison_box(count: int) -> list[int]:
|
| 69 |
+
"""Check whether new boxes can be added."""
|
| 70 |
+
if count < 15:
|
| 71 |
+
new_count = count + 1
|
| 72 |
+
visibility = [gr.update(visible=True) if i < new_count else gr.update(visible=False)
|
| 73 |
+
for i in range(15)]
|
| 74 |
+
return [new_count] + visibility
|
| 75 |
+
else:
|
| 76 |
+
return [count] + [gr.update() for _ in range(15)]
|
main.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from interface import build_interface
|
| 2 |
+
|
| 3 |
+
if __name__ == "__main__":
|
| 4 |
+
demo = build_interface()
|
| 5 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
huggingface_hub
|
| 3 |
+
python-dotenv
|
similarity.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import InferenceClient
|
| 2 |
+
from config import HF_TOKEN
|
| 3 |
+
from custom_css import SIMILARITY_BARS
|
| 4 |
+
|
| 5 |
+
client = InferenceClient(
|
| 6 |
+
provider="hf-inference",
|
| 7 |
+
api_key=HF_TOKEN,
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def compute_similarity(reference_sentence: str, *comparison_sentences):
|
| 13 |
+
"""Compute similarity scores between reference and comparison sentences."""
|
| 14 |
+
sentences_list = [s.strip() for s in comparison_sentences if s and s.strip()]
|
| 15 |
+
|
| 16 |
+
if not sentences_list:
|
| 17 |
+
missing_reference_documents_message = "⚠️ Please enter at least one comparison sentence."
|
| 18 |
+
return f"<div style='color: #e74c3c; padding: 20px; text-align: center;'>{missing_reference_documents_message}</div>"
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
result = client.sentence_similarity(
|
| 22 |
+
sentence=reference_sentence,
|
| 23 |
+
other_sentences=sentences_list,
|
| 24 |
+
model="sentence-transformers/embeddinggemma-300m-medical",
|
| 25 |
+
)
|
| 26 |
+
sorted_results = sorted(zip(sentences_list, result), key=lambda x: x[1], reverse=True)
|
| 27 |
+
output = SIMILARITY_BARS
|
| 28 |
+
|
| 29 |
+
for idx, (sentence, score) in enumerate(sorted_results, 1):
|
| 30 |
+
percentage = score * 100
|
| 31 |
+
if score >= 0.7:
|
| 32 |
+
border_color = "#48bb78"
|
| 33 |
+
gradient = "linear-gradient(90deg, #48bb78 0%, #38a169 100%)"
|
| 34 |
+
elif score >= 0.5:
|
| 35 |
+
border_color = "#ed8936"
|
| 36 |
+
gradient = "linear-gradient(90deg, #ed8936 0%, #dd6b20 100%)"
|
| 37 |
+
else:
|
| 38 |
+
border_color = "#667eea"
|
| 39 |
+
gradient = "linear-gradient(90deg, #667eea 0%, #764ba2 100%)"
|
| 40 |
+
|
| 41 |
+
output += f"""
|
| 42 |
+
<div class="similarity-item" style="border-left-color: {border_color};">
|
| 43 |
+
<div class="sentence-text">
|
| 44 |
+
<span class="rank-badge">#{idx}</span>{sentence}
|
| 45 |
+
</div>
|
| 46 |
+
<div class="bar-container">
|
| 47 |
+
<div class="progress-bar">
|
| 48 |
+
<div class="progress-fill" style="width: {percentage}%; background: {gradient};"></div>
|
| 49 |
+
</div>
|
| 50 |
+
<div class="score-badge" style="background: {border_color};">{score:.4f}</div>
|
| 51 |
+
</div>
|
| 52 |
+
</div>
|
| 53 |
+
"""
|
| 54 |
+
output += "</div>"
|
| 55 |
+
return output
|
| 56 |
+
except Exception as e:
|
| 57 |
+
return f"<div style='color: #e74c3c; padding: 20px; background: #fee; border-radius: 8px; border-left: 4px solid #e74c3c;'><strong>Error:</strong> {str(e)}</div>"
|