rodrigocarrillo commited on
Commit
44b5e1e
·
verified ·
1 Parent(s): dee3475

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +680 -0
app.py ADDED
@@ -0,0 +1,680 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+ import json
4
+ import time
5
+ from pymed import PubMed
6
+ from copy import deepcopy
7
+ from IPython.display import Markdown, display
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ import gradio as gr
10
+ import json
11
+ from typing import Tuple
12
+
13
+
14
+
15
+
16
+
17
+ SYSTEM_PROMPT_GET_TITLES_FROM_LIST_REFERENCES = """
18
+ You are a helpful assistant that extracts information from scientific paper references.
19
+ Given a list of paper references, identify the titles of the papers in these references.
20
+ Omit the non-scientific papers in the list (e.g., websites or books)
21
+ Return your response as a JSON array of objects with the following fields:
22
+ - title: The title of the paper.
23
+ Ensure the JSON is properly formatted.
24
+ Do not include any text outside the JSON structure.
25
+ Do not include any additional text, commentary, or explanation.
26
+ """
27
+
28
+
29
+
30
+
31
+
32
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
33
+ llm_Gemini_25_pro = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.0, google_api_key=GOOGLE_API_KEY)
34
+
35
+
36
+
37
+
38
+
39
+ def list_of_papers(list_of_papers_to_parse: str, max_retries: int = 3, retry_delay: int = 5) -> str:
40
+
41
+ """
42
+ Extracts paper titles from a list of references using an LLM.
43
+
44
+ Args:
45
+ list_of_papers_to_parse: String containing list of paper references
46
+ max_retries: Maximum number of retry attempts for API calls
47
+ retry_delay: Seconds to wait between retries
48
+
49
+ Returns:
50
+ JSON string with paper titles, or error message
51
+ """
52
+
53
+ # Input validation. Return error if input is empty.
54
+ if not list_of_papers_to_parse or not list_of_papers_to_parse.strip():
55
+ return json.dumps({"error": "Empty input provided. Please provide a list of references."})
56
+
57
+ # Try the LLM call with retries in case of failures.
58
+ for attempt in range(max_retries):
59
+ try:
60
+ response = llm_Gemini_25_pro.invoke([ # Call the LLM.
61
+ {"role": "system", "content": SYSTEM_PROMPT_GET_TITLES_FROM_LIST_REFERENCES}, # System prompt.
62
+ {"role": "user", "content": f"List of references:\n{list_of_papers_to_parse}"} # User prompt.
63
+ ])
64
+
65
+ # Check if response is valid
66
+ if not response or not hasattr(response, 'content'): # Validate response object.
67
+ raise ValueError("Invalid response from LLM") # If response doesn't exist or it doesn't have 'content', raise error.
68
+
69
+ content = response.content.strip() # Get content and strip whitespace.
70
+
71
+ # Check for empty response
72
+ if not content:
73
+ raise ValueError("LLM returned empty response") # If content is empty, raise error.
74
+
75
+ # Parse the answer. Strip markdown code fences if present.
76
+ if content.startswith("```"):
77
+ content = re.sub(r'^```(?:json)?\s*\n', '', content)
78
+ content = re.sub(r'\n```\s*$', '', content)
79
+
80
+ content = content.strip()
81
+
82
+ # Validate that the output it's proper JSON.
83
+ try:
84
+ json.loads(content) # Test if valid JSON
85
+ return content
86
+ except json.JSONDecodeError as e:
87
+ raise ValueError(f"LLM returned invalid JSON: {str(e)}")
88
+
89
+ except Exception as e:
90
+ print(f"Attempt {attempt + 1}/{max_retries} failed: {str(e)}")
91
+
92
+ if attempt < max_retries - 1:
93
+ print(f"Retrying in {retry_delay} seconds...")
94
+ time.sleep(retry_delay)
95
+ else:
96
+ # Final attempt failed
97
+ error_message = {
98
+ "error": "LLM service is currently unavailable",
99
+ "message": "The service failed after multiple attempts. Please try again later.",
100
+ "details": str(e)
101
+ }
102
+ return json.dumps(error_message, indent=2)
103
+
104
+ # This shouldn't be reached, but just in case
105
+ return json.dumps({"error": "Unexpected error occurred"})
106
+
107
+
108
+
109
+
110
+
111
+ def fetch_paper_authors_from_pubmed(papers: list, delay: int=5, max_results: int=1, verbose: bool=True) -> list:
112
+
113
+ """
114
+ Fetch authors for each paper from PubMed.
115
+ """
116
+
117
+ pubmed = PubMed(tool="MyTool", email="rodrigo@gmail.com")
118
+ all_results = []
119
+
120
+ for i in range(len(papers)):
121
+ if verbose:
122
+ print(f"Processing paper {i+1}/{len(papers)}: {papers[i]['title']}")
123
+
124
+ time.sleep(delay)
125
+ results = pubmed.query(papers[i]['title'] + '[title]', max_results=max_results)
126
+
127
+ authors_for_this_paper = [article.authors for article in results]
128
+ if not authors_for_this_paper:
129
+ authors_for_this_paper = "No authors found"
130
+
131
+ all_results.append({
132
+ "paper_title": papers[i]["title"],
133
+ "authors": authors_for_this_paper
134
+ })
135
+
136
+ return all_results
137
+
138
+
139
+
140
+
141
+
142
+ email_regex = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
143
+ def contains_email_symbol(obj) -> bool:
144
+ """Return True if '@' appears anywhere in the nested structure (str/list/dict)."""
145
+ if isinstance(obj, str):
146
+ return "@" in obj
147
+ if isinstance(obj, dict):
148
+ # check each value
149
+ return any(contains_email_symbol(v) for v in obj.values())
150
+ if isinstance(obj, list) or isinstance(obj, tuple):
151
+ return any(contains_email_symbol(item) for item in obj)
152
+ return False
153
+
154
+
155
+
156
+
157
+
158
+ def extract_emails_from_obj(obj):
159
+ """Return list of email strings found anywhere in obj."""
160
+ found = set()
161
+ if isinstance(obj, str):
162
+ for m in email_regex.findall(obj):
163
+ found.add(m)
164
+ elif isinstance(obj, dict):
165
+ for v in obj.values():
166
+ found.update(extract_emails_from_obj(v))
167
+ elif isinstance(obj, (list, tuple)):
168
+ for item in obj:
169
+ found.update(extract_emails_from_obj(item))
170
+ return list(found)
171
+
172
+
173
+
174
+
175
+
176
+ def flatten_one_or_many(authors):
177
+ """Recursively flatten nested lists/tuples into a single list of non-list elements."""
178
+ out = []
179
+ if isinstance(authors, (list, tuple)):
180
+ for item in authors:
181
+ if isinstance(item, (list, tuple)):
182
+ out.extend(flatten_one_or_many(item))
183
+ else:
184
+ out.append(item)
185
+ else:
186
+ out.append(authors)
187
+ return out
188
+
189
+
190
+
191
+
192
+
193
+ def filter_all_results_keep_only_email_authors(all_results):
194
+
195
+ """
196
+ Given list of dicts like {'paper_title':..., 'authors': ...},
197
+ return a new list keeping only entries that have >=1 author with an email.
198
+ Within each kept entry, authors without emails are removed.
199
+ Duplicate emails are removed across all authors in the paper.
200
+ """
201
+
202
+ filtered_results = []
203
+ for entry in all_results:
204
+ authors_raw = entry.get("authors", [])
205
+ authors_flat = flatten_one_or_many(authors_raw)
206
+
207
+ authors_with_email = []
208
+ seen_emails = set() # track emails already added for this paper
209
+
210
+ for a in authors_flat:
211
+ if contains_email_symbol(a):
212
+ emails = extract_emails_from_obj(a)
213
+ # remove duplicates per paper
214
+ emails_unique = [e for e in emails if e not in seen_emails]
215
+ if emails_unique: # only keep if new emails
216
+ seen_emails.update(emails_unique)
217
+ if isinstance(a, dict):
218
+ a_copy = deepcopy(a)
219
+ a_copy["_found_emails"] = emails_unique
220
+ authors_with_email.append(a_copy)
221
+ else:
222
+ authors_with_email.append({"raw": a, "_found_emails": emails_unique})
223
+
224
+ if authors_with_email:
225
+ new_entry = deepcopy(entry)
226
+ new_entry["authors"] = authors_with_email
227
+ filtered_results.append(new_entry)
228
+
229
+ return filtered_results
230
+
231
+
232
+
233
+
234
+
235
+ def display_markdown_filtered_results(filtered_results):
236
+
237
+ """Display authors with emails nicely formatted (full name, affiliation, emails)."""
238
+
239
+ md_text = ""
240
+
241
+ for paper in filtered_results:
242
+ authors_nested = paper.get("authors", [])
243
+
244
+ # flatten authors list if nested
245
+ authors_flat = []
246
+ for a in authors_nested:
247
+ if isinstance(a, list):
248
+ authors_flat.extend(a)
249
+ else:
250
+ authors_flat.append(a)
251
+
252
+ # only keep authors that have '_found_emails'
253
+ authors_with_email = []
254
+ for author in authors_flat:
255
+ if isinstance(author, dict):
256
+ emails = author.get("_found_emails", [])
257
+ if emails:
258
+ full_name = f"{author.get('firstname','')} {author.get('lastname','')}".strip()
259
+ affiliation = author.get("affiliation", "").strip()
260
+ authors_with_email.append((full_name, affiliation, emails))
261
+
262
+ # build markdown text
263
+ for name, affiliation, emails in authors_with_email:
264
+ md_text += f"- **Author:** {name}\n"
265
+ md_text += f" - **Affiliation:** {affiliation}\n"
266
+ md_text += f" - **Email(s):** {', '.join(emails)}\n\n"
267
+
268
+ display(Markdown(md_text))
269
+
270
+
271
+
272
+
273
+
274
+
275
+ class PaperReviewerExtractor:
276
+ """
277
+ Main class to extract reviewer emails from a list of paper references.
278
+
279
+ This class orchestrates the entire workflow:
280
+ 1. Extract paper titles from a reference list using LLM
281
+ 2. Fetch author information from PubMed
282
+ 3. Filter authors that have email addresses
283
+ 4. Return structured results
284
+ """
285
+
286
+ def __init__(self):
287
+ self.google_api_key = 'GOOGLE_API_KEY'
288
+ self.llm = ChatGoogleGenerativeAI(
289
+ model="gemini-2.5-flash",
290
+ temperature=0.0,
291
+ google_api_key=self.google_api_key
292
+ )
293
+
294
+ def run(
295
+ self,
296
+ reference_list: str,
297
+ pubmed_delay: int = 5,
298
+ pubmed_max_results: int = 1,
299
+ llm_max_retries: int = 3,
300
+ llm_retry_delay: int = 5,
301
+ verbose: bool = True
302
+ ) -> dict:
303
+ """
304
+ Execute the complete pipeline to extract reviewer emails from references.
305
+
306
+ Args:
307
+ reference_list: String containing paper references
308
+ pubmed_delay: Delay in seconds between PubMed API calls
309
+ pubmed_max_results: Maximum results per PubMed query
310
+ llm_max_retries: Maximum retry attempts for LLM calls
311
+ llm_retry_delay: Delay in seconds between LLM retries
312
+ verbose: Whether to print progress messages
313
+
314
+ Returns:
315
+ Dictionary with keys:
316
+ - 'status': 'success' or 'error'
317
+ - 'papers': List of paper titles extracted
318
+ - 'authors_with_emails': Filtered authors with email addresses
319
+ - 'raw_authors': All authors found before email filtering
320
+ - 'error': Error message if status is 'error'
321
+ """
322
+ try:
323
+ if verbose:
324
+ print("Step 1: Extracting paper titles from references...")
325
+
326
+ # Step 1: Extract paper titles using LLM
327
+ papers_json = self._extract_paper_titles(
328
+ reference_list,
329
+ max_retries=llm_max_retries,
330
+ retry_delay=llm_retry_delay
331
+ )
332
+ papers = json.loads(papers_json)
333
+
334
+ # Check for error response
335
+ if isinstance(papers, dict) and "error" in papers:
336
+ return {
337
+ 'status': 'error',
338
+ 'papers': [],
339
+ 'authors_with_emails': [],
340
+ 'raw_authors': [],
341
+ 'error': papers.get('message', papers.get('error', 'Failed to extract paper titles'))
342
+ }
343
+
344
+ # Ensure papers is a list
345
+ if not isinstance(papers, list):
346
+ return {
347
+ 'status': 'error',
348
+ 'papers': [],
349
+ 'authors_with_emails': [],
350
+ 'raw_authors': [],
351
+ 'error': f'Expected list of papers, got {type(papers).__name__}'
352
+ }
353
+
354
+ if verbose:
355
+ print(f"✓ Found {len(papers)} papers\n")
356
+ print("Step 2: Fetching authors from PubMed...")
357
+
358
+ # Step 2: Fetch authors from PubMed
359
+ all_authors = self._fetch_authors_from_pubmed(
360
+ papers,
361
+ delay=pubmed_delay,
362
+ max_results=pubmed_max_results,
363
+ verbose=verbose
364
+ )
365
+
366
+ if verbose:
367
+ print(f"✓ Fetched authors for {len(all_authors)} papers\n")
368
+ print("Step 3: Filtering authors with email addresses...")
369
+
370
+ # Step 3: Filter authors with emails using the existing helper function
371
+ authors_with_emails = filter_all_results_keep_only_email_authors(all_authors)
372
+
373
+ if verbose:
374
+ print(f"✓ Found {len(authors_with_emails)} papers with authors having email addresses\n")
375
+ print("Pipeline complete!")
376
+
377
+ return {
378
+ 'status': 'success',
379
+ 'papers': papers,
380
+ 'authors_with_emails': authors_with_emails,
381
+ 'raw_authors': all_authors,
382
+ 'error': None
383
+ }
384
+
385
+ except Exception as e:
386
+ error_msg = f"Pipeline error: {str(e)}"
387
+ if verbose:
388
+ print(f"✗ {error_msg}")
389
+ return {
390
+ 'status': 'error',
391
+ 'papers': [],
392
+ 'authors_with_emails': [],
393
+ 'raw_authors': [],
394
+ 'error': error_msg
395
+ }
396
+
397
+ def _extract_paper_titles(
398
+ self,
399
+ reference_list: str,
400
+ max_retries: int = 3,
401
+ retry_delay: int = 5
402
+ ) -> str:
403
+ """Extract paper titles using the LLM."""
404
+ if not reference_list or not reference_list.strip():
405
+ return json.dumps({"error": "Empty input provided."})
406
+
407
+ for attempt in range(max_retries):
408
+ try:
409
+ response = self.llm.invoke([
410
+ {"role": "system", "content": SYSTEM_PROMPT_GET_TITLES_FROM_LIST_REFERENCES},
411
+ {"role": "user", "content": f"List of references:\n{reference_list}"}
412
+ ])
413
+
414
+ if not response or not hasattr(response, 'content'):
415
+ raise ValueError("Invalid response from LLM")
416
+
417
+ content = response.content.strip()
418
+
419
+ if not content:
420
+ raise ValueError("LLM returned empty response")
421
+
422
+ # Parse the answer and strip markdown code fences if present
423
+ if content.startswith("```"):
424
+ content = re.sub(r'^```(?:json)?\s*\n', '', content)
425
+ content = re.sub(r'\n```\s*$', '', content)
426
+
427
+ content = content.strip()
428
+
429
+ # Validate JSON
430
+ try:
431
+ json.loads(content)
432
+ return content
433
+ except json.JSONDecodeError as e:
434
+ raise ValueError(f"Invalid JSON from LLM: {str(e)}")
435
+
436
+ except Exception as e:
437
+ if attempt < max_retries - 1:
438
+ time.sleep(retry_delay)
439
+ else:
440
+ return json.dumps({
441
+ "error": "LLM service unavailable",
442
+ "details": str(e)
443
+ })
444
+
445
+ return json.dumps({"error": "Unexpected error"})
446
+
447
+ def _fetch_authors_from_pubmed(
448
+ self,
449
+ papers: list,
450
+ delay: int = 5,
451
+ max_results: int = 1,
452
+ verbose: bool = True
453
+ ) -> list:
454
+ """Fetch authors for each paper from PubMed."""
455
+ pubmed = PubMed(tool="MyTool", email="rodrigo@gmail.com")
456
+ all_results = []
457
+
458
+ for i in range(len(papers)):
459
+ if verbose:
460
+ print(f" Processing paper {i+1}/{len(papers)}: {papers[i]['title']}")
461
+
462
+ time.sleep(delay)
463
+ results = pubmed.query(papers[i]['title'] + '[title]', max_results=max_results)
464
+
465
+ authors_for_this_paper = [article.authors for article in results]
466
+ if not authors_for_this_paper:
467
+ authors_for_this_paper = "No authors found"
468
+
469
+ all_results.append({
470
+ "paper_title": papers[i]["title"],
471
+ "authors": authors_for_this_paper
472
+ })
473
+
474
+ return all_results
475
+
476
+
477
+
478
+
479
+
480
+ # Re-initialize the extractor (filter function will be auto-resolved from globals)
481
+ extractor = PaperReviewerExtractor()
482
+
483
+ def format_authors_as_markdown(filtered_results):
484
+ """Format authors with emails as nicely formatted markdown (matching cell 12 style)."""
485
+ md_text = ""
486
+
487
+ for paper in filtered_results:
488
+ authors_nested = paper.get("authors", [])
489
+
490
+ # flatten authors list if nested
491
+ authors_flat = []
492
+ for a in authors_nested:
493
+ if isinstance(a, list):
494
+ authors_flat.extend(a)
495
+ else:
496
+ authors_flat.append(a)
497
+
498
+ # only keep authors that have '_found_emails'
499
+ authors_with_email = []
500
+ for author in authors_flat:
501
+ if isinstance(author, dict):
502
+ emails = author.get("_found_emails", [])
503
+ if emails:
504
+ full_name = f"{author.get('firstname','')} {author.get('lastname','')}".strip()
505
+ affiliation = author.get("affiliation", "").strip()
506
+ authors_with_email.append((full_name, affiliation, emails))
507
+
508
+ # build markdown text
509
+ if authors_with_email:
510
+ paper_title = paper.get("paper_title", "Unknown Paper")
511
+ md_text += f"## {paper_title}\n\n"
512
+ for name, affiliation, emails in authors_with_email:
513
+ md_text += f"- **Author:** {name}\n"
514
+ md_text += f" - **Affiliation:** {affiliation}\n"
515
+ md_text += f" - **Email(s):** {', '.join(emails)}\n\n"
516
+
517
+ return md_text if md_text else "No authors with email addresses found."
518
+
519
+ def process_references(
520
+ reference_list: str,
521
+ pubmed_delay: int,
522
+ pubmed_max_results: int,
523
+ llm_max_retries: int,
524
+ llm_retry_delay: int
525
+ ) -> Tuple[str, str, str]:
526
+ """
527
+ Process references and return results in displayable format.
528
+
529
+ Args:
530
+ reference_list: The paste of paper references
531
+ pubmed_delay: Delay between PubMed API calls
532
+ pubmed_max_results: Max results per PubMed query
533
+ llm_max_retries: Max LLM retry attempts
534
+ llm_retry_delay: Delay between LLM retries
535
+
536
+ Returns:
537
+ Tuple of (papers_json, authors_markdown, status_message)
538
+ """
539
+
540
+ # Run the pipeline
541
+ result = extractor.run(
542
+ reference_list=reference_list,
543
+ pubmed_delay=pubmed_delay,
544
+ pubmed_max_results=pubmed_max_results,
545
+ llm_max_retries=llm_max_retries,
546
+ llm_retry_delay=llm_retry_delay,
547
+ verbose=True
548
+ )
549
+
550
+ if result['status'] == 'error':
551
+ error_msg = f"❌ Error: {result['error']}"
552
+ return "", "", error_msg
553
+
554
+ # Format papers output as JSON
555
+ papers_display = json.dumps(result['papers'], indent=2)
556
+
557
+ # Format authors with emails as nice markdown (not JSON)
558
+ authors_display = format_authors_as_markdown(result['authors_with_emails'])
559
+
560
+ # Create status message
561
+ status_msg = f"""
562
+ ✅ **Pipeline Completed Successfully!**
563
+
564
+ 📊 Summary:
565
+ - Papers found: {len(result['papers'])}
566
+ - Authors with emails: {len(result['authors_with_emails'])}
567
+ """
568
+
569
+ return papers_display, authors_display, status_msg
570
+
571
+
572
+ # Create Gradio interface
573
+ with gr.Blocks(title="Paper Reviewer Email Extractor") as demo:
574
+
575
+ gr.Markdown("""
576
+ # 📚 Paper Reviewer Email Extractor
577
+
578
+ > **Instructions & Rationale:**
579
+ >
580
+ > [PLACEHOLDER: Add your instructions and rationale here. Explain the purpose of this tool, how to use it, and the scientific/research justification for the work.]
581
+
582
+ """)
583
+
584
+ with gr.Row():
585
+ with gr.Column(scale=1):
586
+ gr.Markdown("### Input Configuration")
587
+
588
+ # Reference list input
589
+ reference_input = gr.Textbox(
590
+ label="Paper References",
591
+ placeholder="Paste your list of paper references here...",
592
+ lines=10,
593
+ info="Provide a list of scientific paper references in any format"
594
+ )
595
+
596
+ with gr.Row():
597
+ pubmed_delay = gr.Slider(
598
+ minimum=1,
599
+ maximum=30,
600
+ value=5,
601
+ step=1,
602
+ label="PubMed Delay (seconds)",
603
+ info="Delay between PubMed API calls"
604
+ )
605
+
606
+ pubmed_max_results = gr.Slider(
607
+ minimum=1,
608
+ maximum=10,
609
+ value=1,
610
+ step=1,
611
+ label="PubMed Max Results",
612
+ info="Maximum results per PubMed query"
613
+ )
614
+
615
+ with gr.Row():
616
+ llm_max_retries = gr.Slider(
617
+ minimum=1,
618
+ maximum=10,
619
+ value=3,
620
+ step=1,
621
+ label="LLM Max Retries",
622
+ info="Maximum retry attempts for LLM calls"
623
+ )
624
+
625
+ llm_retry_delay = gr.Slider(
626
+ minimum=1,
627
+ maximum=30,
628
+ value=5,
629
+ step=1,
630
+ label="LLM Retry Delay (seconds)",
631
+ info="Delay between LLM retries"
632
+ )
633
+
634
+ submit_btn = gr.Button(
635
+ "🚀 Extract Reviewers",
636
+ variant="primary",
637
+ size="lg"
638
+ )
639
+
640
+ with gr.Column(scale=1):
641
+ gr.Markdown("### Outputs")
642
+
643
+ status_output = gr.Textbox(
644
+ label="Status",
645
+ interactive=False,
646
+ lines=5
647
+ )
648
+
649
+ papers_output = gr.Textbox(
650
+ label="Extracted Papers",
651
+ interactive=False,
652
+ lines=10,
653
+ max_lines=15
654
+ )
655
+
656
+ authors_output = gr.Markdown(
657
+ label="Authors with Email Addresses",
658
+ value="Results will appear here..."
659
+ )
660
+
661
+ # Connect button click to processing function
662
+ submit_btn.click(
663
+ fn=process_references,
664
+ inputs=[
665
+ reference_input,
666
+ pubmed_delay,
667
+ pubmed_max_results,
668
+ llm_max_retries,
669
+ llm_retry_delay
670
+ ],
671
+ outputs=[
672
+ papers_output,
673
+ authors_output,
674
+ status_output
675
+ ]
676
+ )
677
+
678
+ # Launch the interface
679
+ if __name__ == "__main__":
680
+ demo.launch(share=True)