EmmaScharfmann commited on
Commit
c33bcd6
·
1 Parent(s): bbfaed3

push to huggingface space

Browse files
Files changed (9) hide show
  1. .env +1 -0
  2. README.md +33 -13
  3. config.py +6 -0
  4. custom_css.py +125 -0
  5. default_values.py +9 -0
  6. interface.py +76 -0
  7. main.py +5 -0
  8. requirements.txt +3 -0
  9. similarity.py +57 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ HF_TOKEN=
README.md CHANGED
@@ -1,13 +1,33 @@
1
- ---
2
- title: MedicalSearchModel
3
- emoji: 🌍
4
- colorFrom: gray
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 6.5.1
8
- app_file: app.py
9
- pinned: false
10
- short_description: Interactive tool to test "embeddinggemma-300m-medical"
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🔍 Medical Document Search using sentence-transformers/embeddinggemma-300m-medical
2
+
3
+ A search tool specialized in the medical field to helps you find relevant information across your medical documents.
4
+
5
+ ## How It Works
6
+
7
+ 1. **Enter your question** in the reference sentence box (e.g., "Is Mr. Allen eligible for enrollment given his type 2 diabetes?")
8
+ 2. **Add documents** to search through in the comparison sentence boxes
9
+ 3. **Click "Calculate Similarity"** to see ranked results
10
+ 4. **Review the scores**:
11
+ - 🟢 Green (≥0.70): High similarity - very relevant
12
+ - 🟠 Orange (0.50-0.69): Medium similarity - somewhat relevant
13
+ - 🟣 Purple (<0.50): Lower similarity - less relevant
14
+
15
+ ## Installation
16
+ ```bash
17
+ pip install -r requirements.txt
18
+ ```
19
+
20
+ ## Setup
21
+
22
+ 1. Get a Hugging Face API token from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
23
+ 2. Create a `.env` file in the project directory:
24
+ ```
25
+ HF_TOKEN=your_token_here
26
+ ```
27
+ 3. Run the application:
28
+ ```bash
29
+ python main.py
30
+ ```
31
+
32
+ ## Notes
33
+ Note that for efficiency purposes, a template of the interface code and the css code was generate with an LLM.
config.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ HF_TOKEN = os.environ.get("HF_TOKEN")
custom_css.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUSTOM_CSS_FOR_INTERFACE = """
2
+ #title {
3
+ text-align: center;
4
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
5
+ color: white;
6
+ padding: 40px 20px;
7
+ border-radius: 16px;
8
+ margin-bottom: 30px;
9
+ }
10
+ #title h1 {
11
+ margin: 0;
12
+ font-size: 2.5em;
13
+ font-weight: 700;
14
+ }
15
+ #subtitle {
16
+ margin-top: 10px;
17
+ opacity: 0.95;
18
+ font-size: 1.1em;
19
+ }
20
+ .input-section {
21
+ background: #f7fafc;
22
+ padding: 24px;
23
+ border-radius: 12px;
24
+ border: 1px solid #e2e8f0;
25
+ }
26
+ #calc-btn {
27
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
28
+ border: none !important;
29
+ font-size: 16px !important;
30
+ font-weight: 600 !important;
31
+ padding: 12px 32px !important;
32
+ border-radius: 8px !important;
33
+ transition: transform 0.2s ease !important;
34
+ }
35
+ #calc-btn:hover {
36
+ transform: translateY(-2px);
37
+ box-shadow: 0 8px 16px rgba(102, 126, 234, 0.3) !important;
38
+ }
39
+ .add-btn {
40
+ background: #48bb78 !important;
41
+ border: none !important;
42
+ color: white !important;
43
+ font-weight: 600 !important;
44
+ border-radius: 8px !important;
45
+ margin-top: 12px !important;
46
+ }
47
+ .add-btn:hover {
48
+ background: #38a169 !important;
49
+ }
50
+ .comparison-box {
51
+ margin-bottom: 8px;
52
+ }
53
+ """
54
+
55
+ SIMILARITY_BARS = """
56
+ <style>
57
+ .similarity-container {
58
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
59
+ }
60
+ .similarity-item {
61
+ margin-bottom: 24px;
62
+ padding: 16px;
63
+ background: linear-gradient(to right, #f8f9fa 0%, #ffffff 100%);
64
+ border-radius: 12px;
65
+ border-left: 4px solid #667eea;
66
+ transition: transform 0.2s ease;
67
+ }
68
+ .similarity-item:hover {
69
+ transform: translateX(4px);
70
+ box-shadow: 0 4px 12px rgba(102, 126, 234, 0.15);
71
+ }
72
+ .sentence-text {
73
+ font-size: 15px;
74
+ color: #2d3748;
75
+ margin-bottom: 12px;
76
+ line-height: 1.6;
77
+ font-weight: 500;
78
+ }
79
+ .bar-container {
80
+ display: flex;
81
+ align-items: center;
82
+ gap: 12px;
83
+ }
84
+ .progress-bar {
85
+ flex: 1;
86
+ height: 28px;
87
+ background: #e2e8f0;
88
+ border-radius: 14px;
89
+ overflow: hidden;
90
+ position: relative;
91
+ }
92
+ .progress-fill {
93
+ height: 100%;
94
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
95
+ border-radius: 14px;
96
+ transition: width 0.6s ease;
97
+ display: flex;
98
+ align-items: center;
99
+ justify-content: flex-end;
100
+ padding-right: 10px;
101
+ }
102
+ .score-badge {
103
+ background: #667eea;
104
+ color: white;
105
+ padding: 6px 16px;
106
+ border-radius: 20px;
107
+ font-weight: 600;
108
+ font-size: 14px;
109
+ min-width: 70px;
110
+ text-align: center;
111
+ box-shadow: 0 2px 8px rgba(102, 126, 234, 0.3);
112
+ }
113
+ .rank-badge {
114
+ display: inline-block;
115
+ background: #f7fafc;
116
+ color: #4a5568;
117
+ padding: 4px 10px;
118
+ border-radius: 8px;
119
+ font-size: 12px;
120
+ font-weight: 600;
121
+ margin-right: 8px;
122
+ }
123
+ </style>
124
+ <div class="similarity-container">
125
+ """
default_values.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ DEFAULT_DOCUMENTS = [
2
+ "Patients with diabetes are not eligible for the study.",
3
+ "Participants with moderate hepatic impairment received Drug 1 at a reduced dose of 100 mg once daily.",
4
+ "Patients with a history of alcoholic liver disease within the past 5 years were excluded from participation in the study.",
5
+ "Inclusion criteria required a liver biopsy performed within 6 months.",
6
+ "Drug 1 was administered orally once daily for a total treatment duration of 48 weeks.",
7
+ ]
8
+
9
+ DEFAULT_QUESTION = "Is Mr. Allen eligible for enrollment given his type 2 diabetes?"
interface.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from similarity import compute_similarity
3
+ from custom_css import CUSTOM_CSS_FOR_INTERFACE
4
+ from default_values import DEFAULT_DOCUMENTS, DEFAULT_QUESTION
5
+
6
+
7
+ def build_interface():
8
+ """Build and return the Gradio interface"""
9
+ with gr.Blocks(css=CUSTOM_CSS_FOR_INTERFACE, theme=gr.themes.Soft()) as demo:
10
+ with gr.Column(elem_id="title"):
11
+ gr.HTML("<h1>🔍 Smart Medical Search</h1>")
12
+ gr.HTML("<h2> Ask questions, get instant answers from your documents</h2>")
13
+
14
+ with gr.Row():
15
+ with gr.Column(scale=1):
16
+ with gr.Group(elem_classes="input-section"):
17
+ reference_input = gr.Textbox(
18
+ label="📌 Enter your question about your medical documents:",
19
+ placeholder="Enter your question here...",
20
+ value=DEFAULT_QUESTION,
21
+ lines=3,
22
+ max_lines=5
23
+ )
24
+
25
+ gr.HTML("""<h3> 🔍 Your medical documents:</h3>""")
26
+
27
+ comparison_container = gr.Column()
28
+
29
+ with comparison_container:
30
+ comparison_inputs = []
31
+ for i in range(15):
32
+ value = DEFAULT_DOCUMENTS[i] if i < len(DEFAULT_DOCUMENTS) else ""
33
+ comparison_box = gr.Textbox(
34
+ label=f"Sentence {i + 1}",
35
+ placeholder="Enter a document to search...",
36
+ value=value,
37
+ lines=2,
38
+ elem_classes="comparison-box",
39
+ visible=True if i < 5 else False
40
+ )
41
+ comparison_inputs.append(comparison_box)
42
+
43
+
44
+ add_btn = gr.Button("➕ Add another document", elem_classes="add-btn", size="md")
45
+
46
+ visible_count = gr.State(5)
47
+
48
+ add_btn.click(
49
+ fn=_add_comparison_box,
50
+ inputs=[visible_count],
51
+ outputs=[visible_count] + comparison_inputs
52
+ )
53
+
54
+ submit_btn = gr.Button("Search for the answer in your documents", variant="primary", elem_id="calc-btn", size="lg")
55
+
56
+ with gr.Column(scale=1):
57
+ output = gr.HTML(label="Similarity Scores")
58
+
59
+ submit_btn.click(
60
+ fn=compute_similarity,
61
+ inputs=[reference_input] + comparison_inputs,
62
+ outputs=output
63
+ )
64
+
65
+ return demo
66
+
67
+
68
+ def _add_comparison_box(count: int) -> list[int]:
69
+ """Check whether new boxes can be added."""
70
+ if count < 15:
71
+ new_count = count + 1
72
+ visibility = [gr.update(visible=True) if i < new_count else gr.update(visible=False)
73
+ for i in range(15)]
74
+ return [new_count] + visibility
75
+ else:
76
+ return [count] + [gr.update() for _ in range(15)]
main.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from interface import build_interface
2
+
3
+ if __name__ == "__main__":
4
+ demo = build_interface()
5
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ huggingface_hub
3
+ python-dotenv
similarity.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ from config import HF_TOKEN
3
+ from custom_css import SIMILARITY_BARS
4
+
5
+ client = InferenceClient(
6
+ provider="hf-inference",
7
+ api_key=HF_TOKEN,
8
+ )
9
+
10
+
11
+
12
+ def compute_similarity(reference_sentence: str, *comparison_sentences):
13
+ """Compute similarity scores between reference and comparison sentences."""
14
+ sentences_list = [s.strip() for s in comparison_sentences if s and s.strip()]
15
+
16
+ if not sentences_list:
17
+ missing_reference_documents_message = "⚠️ Please enter at least one comparison sentence."
18
+ return f"<div style='color: #e74c3c; padding: 20px; text-align: center;'>{missing_reference_documents_message}</div>"
19
+
20
+ try:
21
+ result = client.sentence_similarity(
22
+ sentence=reference_sentence,
23
+ other_sentences=sentences_list,
24
+ model="sentence-transformers/embeddinggemma-300m-medical",
25
+ )
26
+ sorted_results = sorted(zip(sentences_list, result), key=lambda x: x[1], reverse=True)
27
+ output = SIMILARITY_BARS
28
+
29
+ for idx, (sentence, score) in enumerate(sorted_results, 1):
30
+ percentage = score * 100
31
+ if score >= 0.7:
32
+ border_color = "#48bb78"
33
+ gradient = "linear-gradient(90deg, #48bb78 0%, #38a169 100%)"
34
+ elif score >= 0.5:
35
+ border_color = "#ed8936"
36
+ gradient = "linear-gradient(90deg, #ed8936 0%, #dd6b20 100%)"
37
+ else:
38
+ border_color = "#667eea"
39
+ gradient = "linear-gradient(90deg, #667eea 0%, #764ba2 100%)"
40
+
41
+ output += f"""
42
+ <div class="similarity-item" style="border-left-color: {border_color};">
43
+ <div class="sentence-text">
44
+ <span class="rank-badge">#{idx}</span>{sentence}
45
+ </div>
46
+ <div class="bar-container">
47
+ <div class="progress-bar">
48
+ <div class="progress-fill" style="width: {percentage}%; background: {gradient};"></div>
49
+ </div>
50
+ <div class="score-badge" style="background: {border_color};">{score:.4f}</div>
51
+ </div>
52
+ </div>
53
+ """
54
+ output += "</div>"
55
+ return output
56
+ except Exception as e:
57
+ return f"<div style='color: #e74c3c; padding: 20px; background: #fee; border-radius: 8px; border-left: 4px solid #e74c3c;'><strong>Error:</strong> {str(e)}</div>"