sumit4352 commited on
Commit
ae4fa62
·
verified ·
1 Parent(s): 98f1b36

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.py +104 -0
  2. main.py +332 -0
config.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ # Keywords
4
+ keywords = {
5
+ "Gender": ["male", "female", " man ", "woman", " men ", " men,", "women", "boy", "girl", "males", "females"],
6
+ "Age": [" age ", "age,", " aged ", "years old", "year-old", "year olds", "elderly", "adults", "young", "youth"],
7
+ "Patients": ["patient", "patients", "case", "cases", "subject", "subjects", "individual", "individuals"],
8
+ "Participants": ["participant", "participants", "attendee", "attendees", "respondent", "respondents"],
9
+ "Inclusion Criteria": ["inclusion", "eligibility criteria", "study inclusion", "included"],
10
+ "Exclusion Criteria": ["exclusion", "not eligible", "study exclusion", "excluded"],
11
+ "Study Types": [
12
+ "Case Report", "Case Series", "Cross-sectional Study", "Case-Control Study", "Cohort Study", "Randomized Controlled Clinical Trial",
13
+ "Non-Randomized Controlled Trial", "Pilot Study", "Feasibility Study", "Longitudinal Study", "Retrospective Study", "Prospective Study",
14
+ "Observational Study", "Experimental Study", "Interventional Study", "Descriptive Study", "Analytical Study", "Quasi-Experimental Study",
15
+ "Epidemiological Study", "Ecological Study", "Systematic Review", "Meta-Analysis", "Mixed-Methods Study", "Narrative Review", "Scoping Review",
16
+ "Rapid Review", "Umbrella Review", "Diagnostic Accuracy Study", "Validation Study", "Genome-Wide Association Study (GWAS)",
17
+ "Gene-Environment Interaction Study", "Linkage Study", "Sensitivity/Specificity Study", "Cost-Effectiveness Study", "Health Technology Assessment",
18
+ "Quality Improvement Study", "Translational Research", "Implementation Science Study", "Psychometric Study", "Community-Based Participatory Research (CBPR)",
19
+ "In Vitro Study", "In Vivo Study", "Simulation Study", "Phenomenological Study", "Ethnographic Study", "Grounded Theory Study", "Narrative Study",
20
+ "Case Study", "Pragmatic Trial", "Cluster Randomized Trial", "Adaptive Trial", "Phase 1 Clinical Trial", "Phase 2 Clinical Trial", "Phase 3 Clinical Trial",
21
+ "Phase 4 Clinical Trial", "Real-World Evidence Study", "Comparative Effectiveness Study", "Proof-of-Concept Study", "Dose-Response Study", "Cross-Over Study",
22
+ "Nested Study", "Multicenter Study", "Delphi Study", "Pragmatic Clinical Trial", "Registry-Based Study", "Historical Cohort Study",
23
+ "Nested Case-Control Study", " double-blind ", "double blind", "placebo-controlled", "placebo controlled", "Cross-sectional analysis"
24
+ ],
25
+ "Co-morbidities": ["comorbidities", "co-morbidities", "comor-bidities", " comorbidities ", "comorbidities"],
26
+ "Country": ["Afghanistan", "Australia", "Brazil", "Canada", "China", "France", "Germany", "India", "Japan", "Mexico", "Nigeria", "Russia",
27
+ "South Africa", "United Kingdom", "United States", "Prefer Not to Answer"],
28
+ "Race/Ethnicity": ["white", "Black", "African American", "Asian", "Native Hawaiian", "Other Pacific Islander", "American Indian",
29
+ "Alaska Native", "Other Race", "Two or More Races", "Hispanic", "latino", "Not Hispanic or latino"],
30
+ "Follow-Up": ["years", "year", "weeks", "week", "months", "month", "days", "day"],
31
+ "Remark": [
32
+ "displayed", "exhibited", "revealed", "indicated", "illustrated", "Showed",
33
+ "noticed", "perceived", "detected", "discerned", "identified", "Observed",
34
+ "progress", "enhancement", "advancement", "growth", "betterment", "Improvement",
35
+ "proved", "exhibited", "showcased", "conveyed", "validated", "Demonstrated",
36
+ "similar", "equivalent", "parallel", "analogous", "akin", "Comparable",
37
+ "more secure", "less risky", "protected", "shielded", "guarded", "Safer",
38
+ "chosen", "picked", "opted", "designated", "elected", "Selected"
39
+ ],
40
+ "Intervention Groups": [
41
+ "intervention grorup", "intervention groups", "treatment groups", "treatment group", "control groups", "control group", "placebo group",
42
+ "placebo groups"
43
+ ],
44
+ "Outcomes": [
45
+ "results", "findings", "observations", "conclusion", "outcome", "clinical outcome", "results:",
46
+ "efficacy", "effectiveness", "treatment response", "pain reduction", "symptom improvement",
47
+ "disease progression", "treatment success", "remission rate", "response rate", "conclusion:",
48
+ "adverse effects", "side effects", "complications", "recurrence", "recovery time", "result:",
49
+ "statistical significance", "p-value", "confidence interval", "hazard ratio", "risk reduction"
50
+ ],
51
+ "Assessment Tools": [
52
+ "Visual Analog Scale (VAS)", "WOMAC", "Western Ontario and McMaster Universities Osteoarthritis Index",
53
+ "Numeric Rating Scale (NRS)", "McGill Pain Questionnaire (MPQ)", "Timed Up and Go Test (TUG)",
54
+ "6-Minute Walk Test (6MWT)", "gait analysis", "joint range of motion", "functional independence measure",
55
+ "SF-36", "EQ-5D", "Beck Depression Inventory", "Hospital Anxiety and Depression Scale (HADS)",
56
+ "blood tests", "CRP", "C-reactive protein", "ESR", "TNF-α", "IL-6", "synovial fluid analysis",
57
+ "X-ray", "Kellgren-Lawrence grade", "MRI", "magnetic resonance imaging", "musculoskeletal ultrasound",
58
+ "cartilage thickness", "bone marrow lesion", "mental health scales", "quality of life assessments", "ELISA"
59
+ ]
60
+ }
61
+
62
+ # Author name regex pattern
63
+ author_pattern = r'\b(?:[A-Z]\.\s*)*[A-Z][a-zA-Z\.\-\']+(?:\s[A-Z][a-zA-Z\.\-\']+)*\b(?:\s[0-9]+)?'
64
+
65
+ # Words and patterns to exclude
66
+ exclude_words = {
67
+ "Aim", "This", "the", "Article", "School", "Topical", "with", "compress",
68
+ "Research", "Capsi", "India", "Australia", "and", "others", "January", "February",
69
+ "March", "April", "May", "June", "July", "August", "September", "October",
70
+ "November", "December", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
71
+ "Saturday", "Sunday", "AM", "PM", "University", "College", "Institute", "School",
72
+ "of", "in", "on", "at", "by", "for", "with", "about", "against", "between",
73
+ "into", "through", "during", "before", "after", "above", "below", "to", "from",
74
+ "up", "down", "in", "out", "over", "under", "again", "further", "then", "once",
75
+ "here", "there", "when", "where", "why", "how", "all", "any", "both", "each",
76
+ "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only",
77
+ "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just",
78
+ "don", "should", "now", "Ginger", "Migraine"
79
+ }
80
+
81
+ # Regex patterns
82
+ numeric_regex = re.compile(r"\b(?:-?\d+\.?\d*%?|\d+-\d+%?|\d+(?: \d+)*%?)\b")
83
+ exclude_brackets_regex = re.compile(r"[$$($$]\s*[\d,/-]+\s*[$$)$$]")
84
+ date_regex = re.compile(r"\b(?:\d{1,2}/\d{1,2}/\d{2,4}|\d{1,2}-\d{1,2}-\d{2,4}|\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\b \d{1,2}, \d{4})\b", re.IGNORECASE)
85
+ table_regex = re.compile(r"^(?:\s*\d+\s+)+$")
86
+
87
+ # Build regex patterns for exact matches
88
+ gender_regex = re.compile(rf'\b(?:{"|".join(map(re.escape, keywords["Gender"]))})\b', re.IGNORECASE)
89
+ age_regex = re.compile(rf'\b(?:{"|".join(map(re.escape, keywords["Age"]))})\b', re.IGNORECASE)
90
+ outcomes_regex = re.compile(rf'\b(?:{"|".join(map(re.escape, keywords["Outcomes"]))})\b', re.IGNORECASE)
91
+ assessment_tools_regex = re.compile(rf'\b(?:{"|".join(map(re.escape, keywords["Assessment Tools"]))})\b', re.IGNORECASE)
92
+
93
+ # Time duration regex pattern
94
+ follow_up = re.compile(
95
+ rf'\b(?:\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s*(?:{"|".join(map(re.escape, keywords["Follow-Up"]))})(?:\b|s\b|-| to \d+)\b',
96
+ re.IGNORECASE
97
+ )
98
+
99
+ # Key sections for extraction
100
+ key_sections = [
101
+ "Summary", "Overview", "Synopsis", "Results", "Findings", "Observations", "Conclusion",
102
+ "Assessment", "Evaluation", "Outcomes", "Measurements", "Test Results", "Analysis",
103
+ "Abstract", "A B S T R A C T", "Background"
104
+ ]
main.py ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prac.py
2
+
3
+ import gradio as gr
4
+ import re
5
+ import fitz # PyMuPDF
6
+ import spacy
7
+ from config import (
8
+ keywords,
9
+ numeric_regex,
10
+ exclude_brackets_regex,
11
+ date_regex,
12
+ table_regex,
13
+ gender_regex,
14
+ age_regex,
15
+ author_pattern,
16
+ exclude_words,
17
+ key_sections,
18
+ follow_up
19
+ )
20
+
21
+ # Load spaCy's English model
22
+ nlp = spacy.load("en_core_web_sm")
23
+
24
+ def normalize_text(text):
25
+ """Normalize text by removing extra whitespace."""
26
+ text = re.sub(r'([a-zA-Z0-9])([\),.!?;-]+)([a-zA-Z])', r'\1\2 \3', text ) # Space between delimmiter and letter
27
+ text = re.sub(r'([a-z])([\.])([\s]*)([a-z])', r'\1 \3\4', text) # Reomove '.' between two lowercase letters e.g., et al. xxx
28
+ text = re.sub(r'([0-9]+)([\.]+)([0-9]+)([\.]+)([0-9]+)', r'\1-\3-\5', text) # Reomove '.' between three decimal numbers e.g., et 000.55.66
29
+ text = re.sub(r'([a-z])([\.]*)([0-9])', r'\1\2 \3', text) # Space between letter and no.
30
+ text = re.sub(r'(\s)([a-z0-9]+)([A-Z])([\w]+)', r'\1\2. \3\4', text) # Put a '.' after a lowercase letter/number followed by Uppercase e.g., drains removed by day threeHe continued to
31
+ text = re.sub(r'([a-z0-9])([\n]+)([A-Z])', r'\1\. \3', text) # Put a between lowercase letter/number, \n and uppercase letter e.g., xxx5 \n Yyy
32
+ text = re.sub(r'(\.)([\s]*)([\.]+)', r'\1', text) # Removing extra '.'s, if any
33
+ text = re.sub(r'([a-zA-Z0-9])([\s]*)([-])([\s]*)([a-zA-Z0-9])', r'\1\3\5', text) # Replace words like trans - anethole with trans-anethole
34
+ # return text
35
+ return " ".join(line.strip() for line in text.splitlines())
36
+
37
+ def extract_sentences(text):
38
+ """Split text into sentences."""
39
+ return re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|!)\s', text)
40
+
41
+ def contains_valid_numeric(sentence):
42
+ """Check if a sentence contains valid numeric values."""
43
+ matches = numeric_regex.findall(sentence)
44
+ bracketed_numbers = exclude_brackets_regex.findall(sentence)
45
+ return bool(matches) and len(matches) != len(bracketed_numbers)
46
+
47
+ def matches_criteria(sentence, check_time_duration=False):
48
+ """Check if a sentence matches any of the defined keyword criteria."""
49
+ if date_regex.search(sentence) or table_regex.match(sentence):
50
+ return False
51
+
52
+ # Gender: Whole-word match only
53
+ contains_gender = bool(gender_regex.search(sentence))
54
+
55
+ # Age: Must contain numeric + age-related keyword as a whole word
56
+ # contains_age_and_numeric = bool(re.search(
57
+ # r"\b(\d{1,3})\s*(?:years? old|year-old|year olds?|aged|age|young|elderly)\b",
58
+ # sentence, re.IGNORECASE
59
+ # ))
60
+
61
+
62
+ contains_age_and_numeric = bool(re.search(
63
+ r"\b(?:\d{1,3}(?:–\d{1,3})?)\s*(?:years?|year-old|year olds?|aged\b|ages\b)\b",
64
+ sentence, re.IGNORECASE
65
+ ))
66
+
67
+
68
+
69
+ # Patients: Must contain numeric + patients
70
+ contains_patients_and_numeric = bool(re.search(
71
+ r"\b(\d+)\s*(?:patient|patients|case|cases|subject|subjects)\b",
72
+ sentence, re.IGNORECASE
73
+ ))
74
+
75
+ # Participants: Must contain numeric + participants
76
+ contains_participants_and_numeric = bool(re.search(
77
+ r"\b(\d+)\s*(?:participant|participants|attendee|respondent|volunteer)\b",
78
+ sentence, re.IGNORECASE
79
+ ))
80
+
81
+ # Inclusion and Exclusion: Must contain numeric + keyword
82
+ contains_inclusion_and_numeric = bool(re.search(
83
+ r"\b(\d+)\s*(?:inclusion|eligibility criteria|study inclusion)\b",
84
+ sentence, re.IGNORECASE
85
+ ))
86
+ contains_exclusion_and_numeric = bool(re.search(
87
+ r"\b(\d+)\s*(?:exclusion|study exclusion|not eligible)\b",
88
+ sentence, re.IGNORECASE
89
+ ))
90
+
91
+ # Co-morbidities: Matches keyword only
92
+ contains_comorbidities = any(kw in sentence.lower() for kw in keywords["Co-morbidities"])
93
+
94
+ # Time durations: Matches numeric + time unit
95
+ time_duration_regex = re.compile(
96
+ r'\b(?:\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s*'
97
+ + r"(?:years|year|weeks|week|months|month|days|day)\b",
98
+ re.IGNORECASE
99
+ )
100
+ contains_time_duration = bool(time_duration_regex.search(sentence))
101
+
102
+ # Ensure the sentence contains valid numeric values
103
+ contains_valid_numeric_value = contains_valid_numeric(sentence)
104
+
105
+ # Additional criteria based on Remark and Intervention Groups
106
+ contains_remark = any(kw in sentence.lower() for kw in keywords["Remark"])
107
+ contains_intervention = any(kw in sentence.lower() for kw in keywords["Intervention Groups"])
108
+ contains_study_type = any(kw in sentence.lower() for kw in keywords["Study Types"])
109
+ contains_country = any(kw in sentence.lower() for kw in keywords["Country"])
110
+ contains_race = any(kw in sentence.lower() for kw in keywords["Race/Ethnicity"])
111
+
112
+ if check_time_duration:
113
+ return contains_time_duration
114
+
115
+ return (
116
+ contains_valid_numeric_value and (
117
+ contains_gender
118
+ or contains_age_and_numeric
119
+ or contains_patients_and_numeric
120
+ or contains_participants_and_numeric
121
+ or contains_inclusion_and_numeric
122
+ or contains_exclusion_and_numeric
123
+ or contains_comorbidities
124
+ or contains_time_duration
125
+ or contains_remark
126
+ or contains_intervention
127
+ or contains_study_type
128
+ or contains_country
129
+ )
130
+ )
131
+
132
+ def matches_keyword(sentence, user_keywords):
133
+ """Check if a sentence contains any of the user-specified keywords."""
134
+ return any(keyword.lower() in sentence.lower() for keyword in user_keywords)
135
+
136
+ def extract_authors(page):
137
+ """Extract authors' names from the text above specified headers."""
138
+ full_text = page.get_text()
139
+
140
+ # Find the position of key sections
141
+ section_positions = {section: full_text.find(section) for section in key_sections}
142
+ # Filter out sections not found
143
+ section_positions = {k: v for k, v in section_positions.items() if v != -1}
144
+
145
+ # Determine the closest section and extract text above it
146
+ if section_positions:
147
+ closest_section = min(section_positions, key=section_positions.get)
148
+ cutoff_position = section_positions[closest_section]
149
+ text_to_search = full_text[:cutoff_position] # Extract text above the section
150
+ else:
151
+ text_to_search = full_text
152
+
153
+ # Find author names using regex
154
+ author_matches = re.findall(author_pattern, text_to_search)
155
+
156
+ # Use NLP to further refine author name extraction
157
+ doc = nlp(text_to_search)
158
+ nlp_names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
159
+
160
+ # Combine regex and NLP results, filtering out unwanted words
161
+ combined_names = set(author_matches + nlp_names)
162
+ filtered_authors = [name for name in combined_names if name.lower() not in exclude_words]
163
+
164
+ return list(set(filtered_authors))
165
+
166
+ def highlight_keywords(sentence, user_keywords):
167
+ """Highlight user_keywords in the sentence using <mark> tags."""
168
+ if not user_keywords:
169
+ return sentence
170
+
171
+ # Separate single-word and multi-word keywords
172
+ single_words = [kw for kw in user_keywords if ' ' not in kw]
173
+ phrases = [kw for kw in user_keywords if ' ' in kw]
174
+
175
+ # Escape keywords for regex
176
+ escaped_single_words = [re.escape(kw) for kw in single_words]
177
+ escaped_phrases = [re.escape(kw) for kw in phrases]
178
+
179
+ # Build regex patterns
180
+ patterns = []
181
+ if escaped_single_words:
182
+ single_word_pattern = r'\b(?:' + '|'.join(escaped_single_words) + r')\b'
183
+ patterns.append(single_word_pattern)
184
+ if escaped_phrases:
185
+ phrase_pattern = r'(?:' + '|'.join(escaped_phrases) + r')'
186
+ patterns.append(phrase_pattern)
187
+
188
+ # Combine patterns into a single regex
189
+ if patterns:
190
+ combined_pattern = re.compile('|'.join(patterns), re.IGNORECASE)
191
+ else:
192
+ return sentence
193
+
194
+ # Function to add <mark> tags
195
+ def replacer(match):
196
+ return f"<mark>{match.group(0)}</mark>"
197
+
198
+ # Substitute matched keywords with highlighted version
199
+ highlighted_sentence = combined_pattern.sub(replacer, sentence)
200
+ return highlighted_sentence
201
+
202
+ def process_file(file_path, user_keywords, check_time_duration=False):
203
+ """
204
+ Process the PDF file and extract sentences based on criteria,
205
+ then filter by user keywords and highlight them.
206
+ """
207
+ doc = fitz.open(file_path)
208
+ first_page = doc[0]
209
+ author_names = extract_authors(first_page)
210
+ authors_str = ', '.join(author_names)
211
+
212
+ all_extracted_sentences = []
213
+
214
+ for page in doc:
215
+ text = normalize_text(page.get_text())
216
+ sentences = extract_sentences(text)
217
+ extracted = [sentence.strip() for sentence in sentences if matches_criteria(sentence, check_time_duration)]
218
+ all_extracted_sentences.extend(extracted)
219
+
220
+ if not check_time_duration:
221
+ filtered_sentences = [sentence for sentence in all_extracted_sentences if matches_keyword(sentence, user_keywords)]
222
+ else:
223
+ filtered_sentences = all_extracted_sentences
224
+
225
+ # Highlight keywords in the filtered sentences
226
+ highlighted_sentences = [highlight_keywords(sentence, user_keywords) for sentence in filtered_sentences]
227
+
228
+ doc.close()
229
+ return highlighted_sentences, authors_str
230
+
231
+ def process_text(input_text, user_keywords, check_time_duration=False):
232
+ """
233
+ Process the input text and extract sentences based on criteria,
234
+ then filter by user keywords and highlight them.
235
+ """
236
+ refined_text = normalize_text(input_text)
237
+ sentences = extract_sentences(refined_text)
238
+ extracted_sentences = [sentence.strip() for sentence in sentences if matches_criteria(sentence, check_time_duration)]
239
+
240
+ if not check_time_duration:
241
+ filtered_sentences = [sentence for sentence in extracted_sentences if matches_keyword(sentence, user_keywords)]
242
+ else:
243
+ filtered_sentences = extracted_sentences
244
+
245
+ # Highlight keywords in the filtered sentences
246
+ highlighted_sentences = [highlight_keywords(sentence, user_keywords) for sentence in filtered_sentences]
247
+
248
+ return highlighted_sentences, "Authors not extracted from text input."
249
+
250
+ def handle_input(file_path=None, input_text=None, keyword_group=None, custom_keywords=None, time_duration=False):
251
+ """
252
+ Handle user input from the Gradio interface,
253
+ process the file or text, and return highlighted sentences with authors.
254
+ """
255
+ # Decide on which keywords to use
256
+ user_keywords = []
257
+ if keyword_group:
258
+ user_keywords = keywords.get(keyword_group, [])
259
+ if custom_keywords:
260
+ user_keywords.extend(kw.strip() for kw in custom_keywords.split(",") if kw.strip())
261
+
262
+ if not user_keywords and not time_duration:
263
+ return "No keyword provided."
264
+
265
+ if file_path:
266
+ extracted_sentences, authors_str = process_file(file_path, user_keywords, time_duration)
267
+ elif input_text:
268
+ extracted_sentences, authors_str = process_text(input_text, user_keywords, time_duration)
269
+ else:
270
+ return "No input provided."
271
+
272
+ if extracted_sentences:
273
+ # Combine authors and highlighted sentences into HTML
274
+ highlighted_html = f"<p><b>Authors:</b> {authors_str}</p>"
275
+ for sentence in extracted_sentences:
276
+ highlighted_html += f"<p>{sentence}</p>"
277
+ return highlighted_html
278
+
279
+ return "No matching sentences found."
280
+
281
+ # Gradio Interface
282
+ iface = gr.Interface(
283
+ fn=handle_input,
284
+ inputs=[
285
+ gr.File(label="Upload PDF or Text File", type="filepath"),
286
+ gr.Textbox(label="Enter Text", placeholder="Type or paste text here..."),
287
+ gr.Radio(
288
+ choices=list(keywords.keys()),
289
+ label="Information related to..."
290
+ ),
291
+ gr.Textbox(
292
+ label="Enter Custom Keywords",
293
+ placeholder="e.g., migraine, headache"
294
+ ),
295
+ # gr.Checkbox(
296
+ # label="Check Time Duration Criteria",
297
+ # value=False
298
+ # )
299
+ ],
300
+ outputs=gr.HTML(label="Processed Output"),
301
+ title="BioMedical Information Extraction",
302
+ description="""
303
+ <div style='text-align: left;'>
304
+ Made by: Sumit Kumar (2311006), Ramavath Tharun (21219) <br>
305
+ Supervisor: Dr. Tanmay Basu<br>
306
+ Indian Institute of Science Education and Research<br>
307
+ </div>
308
+ <div style='text-align: center; margin-top: 10px;'>
309
+ <b>Upload a PDF file or enter text, then select a keyword group or enter custom keywords to extract and highlight relevant sentences.</b>
310
+ </div>
311
+ """,
312
+ examples=None, # You can add example files or texts if desired
313
+ allow_flagging="never",
314
+ cache_examples=True,
315
+ # Add custom CSS to style the <mark> tag if necessary
316
+ css="""
317
+ mark {
318
+ background-color: blue;
319
+ padding: 0;
320
+ border-radius: 2px;
321
+ }
322
+ /* Optional: Adjust paragraph spacing */
323
+ p {
324
+ margin-bottom: 10px;
325
+ }
326
+ """
327
+ )
328
+
329
+ iface.launch(share=True)
330
+
331
+
332
+