Spaces:
Sleeping
Sleeping
File size: 9,589 Bytes
9a4a0bb 9c12608 9a4a0bb 9c12608 9a4a0bb 9c12608 9a4a0bb 6760933 9a4a0bb 9c12608 9a4a0bb 6760933 9a4a0bb 6760933 9a4a0bb 6760933 9a4a0bb 6760933 9a4a0bb 6760933 9a4a0bb 6760933 9a4a0bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 |
"""Gradio app for AI-powered literature review system with Semantic Scholar integration."""
import gradio as gr
import os
from typing import Optional, List, Dict
from markitdown import MarkItDown
from agents import MultiReviewerSystem
import requests
import time
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
def extract_text_from_pdf(pdf_file) -> str:
"""Extract text content from a PDF file using markitdown."""
try:
if pdf_file is None:
return ""
md = MarkItDown()
result = md.convert(pdf_file.name)
return result.text_content
except Exception as e:
return f"Error extracting text from PDF: {str(e)}"
def search_semantic_scholar(query: str, limit: int = 5, api_key: str = None) -> List[Dict]:
"""Search for related papers on Semantic Scholar with rate limiting."""
try:
url = "https://api.semanticscholar.org/graph/v1/paper/search"
params = {
"query": query,
"limit": limit,
"fields": "title,authors,year,abstract,citationCount,url,openAccessPdf"
}
headers = {}
if api_key:
headers["x-api-key"] = api_key
# Rate limiting: 1 request per second
time.sleep(1)
response = requests.get(url, params=params, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
return data.get("data", [])
except Exception as e:
print(f"Error searching Semantic Scholar: {e}")
return []
def format_semantic_scholar_results(papers: List[Dict]) -> str:
"""Format Semantic Scholar results for display."""
if not papers:
return "No related papers found."
formatted = "## π Related Papers from Semantic Scholar\n\n"
for i, paper in enumerate(papers, 1):
title = paper.get("title", "N/A")
authors = ", ".join([a.get("name", "") for a in paper.get("authors", [])])
year = paper.get("year", "N/A")
citations = paper.get("citationCount", 0)
abstract = paper.get("abstract", "No abstract available")
url = paper.get("url", "")
pdf_url = paper.get("openAccessPdf", {})
formatted += f"### {i}. {title}\n\n"
formatted += f"**Authors**: {authors}\n\n"
formatted += f"**Year**: {year} | **Citations**: {citations}\n\n"
formatted += f"**Abstract**: {abstract[:300]}{'...' if len(abstract) > 300 else ''}\n\n"
if url:
formatted += f"[View on Semantic Scholar]({url})"
if pdf_url and pdf_url.get("url"):
formatted += f" | [Download PDF]({pdf_url['url']})"
formatted += "\n\n---\n\n"
return formatted
def extract_paper_title_from_text(text: str) -> str:
"""Extract paper title from the beginning of the text."""
lines = text.split('\n')
for line in lines[:20]: # Check first 20 lines
line = line.strip()
if len(line) > 20 and len(line) < 200: # Reasonable title length
return line
return "Research Paper"
def review_paper(
pdf_file,
search_related: bool,
progress=gr.Progress()
) -> tuple[str, str, str, str, str]:
"""Main function to process PDF and generate reviews."""
if pdf_file is None:
return "Please upload a PDF file.", "", "", "", ""
# Get API credentials from environment variables
final_api_key = os.getenv("OPENAI_API_KEY", "")
final_base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
final_model = os.getenv("MODEL_NAME", "gpt-3.5-turbo")
if not final_api_key or final_api_key.strip() == "":
return "Please provide an API key or set OPENAI_API_KEY environment variable.", "", "", "", ""
# Extract text from PDF
progress(0.1, desc="Extracting text from PDF...")
paper_text = extract_text_from_pdf(pdf_file)
if paper_text.startswith("Error"):
return paper_text, "", "", "", ""
if len(paper_text.strip()) == 0:
return "Could not extract text from PDF. The file might be empty or image-based.", "", "", "", ""
# Search for related papers if requested
related_papers_md = ""
if search_related:
progress(0.2, desc="Searching for related papers...")
paper_title = extract_paper_title_from_text(paper_text)
semantic_scholar_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY", "")
related_papers = search_semantic_scholar(paper_title, limit=5, api_key=semantic_scholar_key)
related_papers_md = format_semantic_scholar_results(related_papers)
# Initialize multi-reviewer system
progress(0.3, desc="Initializing reviewers...")
try:
reviewer_system = MultiReviewerSystem(
api_key=final_api_key,
base_url=final_base_url,
model=final_model
)
# Generate reviews
def progress_callback(value, desc):
progress(0.3 + (value * 0.7), desc=desc)
result = reviewer_system.review_paper_sequential(
paper_text,
progress_callback=progress_callback
)
# Format summary
summary = f"""
## Review Summary
**Average Score**: {result['average_score']:.2f}/10
**Successful Reviews**: {result['successful_reviews']}/{result['total_reviewers']}
---
"""
# Extract individual reviews
review_1 = ""
review_2 = ""
review_3 = ""
for i, review_data in enumerate(result['reviews']):
score_text = f"{review_data['score']:.2f}/10" if review_data['score'] else 'N/A'
review_text = f"""
### {review_data['reviewer']}
**Score**: {score_text}
{review_data['review']}
---
"""
if i == 0:
review_1 = review_text
elif i == 1:
review_2 = review_text
elif i == 2:
review_3 = review_text
return summary, review_1, review_2, review_3, related_papers_md
except Exception as e:
error_msg = f"Error during review process: {str(e)}"
return error_msg, "", "", "", related_papers_md
# Create Gradio interface
with gr.Blocks(title="AI Literature Review System", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# π AI-Powered Literature Review System
Upload a research paper (PDF) and get comprehensive reviews from multiple AI agents with different perspectives.
## Features:
- **Multi-Agent Review**: Three specialized reviewers evaluate your paper sequentially
- **Comprehensive Analysis**: Originality, quality, clarity, significance, and more
- **Detailed Feedback**: Strengths, weaknesses, questions, and suggestions
- **Scoring System**: Based on top-tier conference standards (NeurIPS-style)
- **Semantic Scholar Integration**: Find related papers for comparison
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### π€ Upload Paper")
pdf_input = gr.File(
label="Upload Research Paper (PDF)",
file_types=[".pdf"],
type="filepath"
)
search_related_checkbox = gr.Checkbox(
label="Search for related papers on Semantic Scholar",
value=True,
info="Find similar papers for comparison"
)
submit_btn = gr.Button("π Review Paper", variant="primary", size="lg")
gr.Markdown("""
### π₯ Reviewers (Sequential):
1. **Experimentalist**: Methodology and results
2. **Impactist**: Impact and significance
3. **Novelty Seeker**: Originality and innovation
### β±οΈ Processing Time:
Expect 3-6 minutes for complete review
(Sequential processing with rate limiting)
""")
with gr.Column(scale=2):
gr.Markdown("### π Review Results")
summary_output = gr.Markdown(label="Summary")
with gr.Tabs():
with gr.Tab("Reviewer 1: Experimentalist"):
review_1_output = gr.Markdown()
with gr.Tab("Reviewer 2: Impactist"):
review_2_output = gr.Markdown()
with gr.Tab("Reviewer 3: Novelty Seeker"):
review_3_output = gr.Markdown()
with gr.Tab("Related Papers"):
related_papers_output = gr.Markdown()
# Connect the button to the review function
submit_btn.click(
fn=review_paper,
inputs=[pdf_input, search_related_checkbox],
outputs=[summary_output, review_1_output, review_2_output, review_3_output, related_papers_output]
)
gr.Markdown("""
---
### π How to Use:
1. Upload your research paper in PDF format
2. Optionally enable Semantic Scholar search for related papers
3. Click "Review Paper" and wait for the sequential multi-agent analysis (3-6 minutes)
4. Review the detailed feedback from all three reviewers
### π Score Interpretation:
- **9-10**: Award Quality / Strong Accept
- **7-8**: Accept
- **5-6**: Borderline
- **3-4**: Borderline Reject
- **1-2**: Reject
### β οΈ Notes:
- Reviews are generated **sequentially** (one at a time) with rate limiting
- Processing time: 3-6 minutes depending on paper length
- Ensure your PDF contains extractable text (not scanned images)
- All API credentials are pre-configured
""")
if __name__ == "__main__":
demo.launch(share=False)
|