File size: 9,589 Bytes
9a4a0bb
 
 
 
 
 
 
 
 
9c12608
 
 
 
9a4a0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c12608
 
9a4a0bb
 
 
 
 
 
 
 
9c12608
 
 
 
 
 
 
 
9a4a0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6760933
 
 
 
9a4a0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c12608
 
9a4a0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6760933
9a4a0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6760933
 
 
9a4a0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6760933
9a4a0bb
 
 
 
 
 
6760933
 
 
 
9a4a0bb
 
 
 
 
 
 
 
 
6760933
 
9a4a0bb
6760933
9a4a0bb
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
"""Gradio app for AI-powered literature review system with Semantic Scholar integration."""

import gradio as gr
import os
from typing import Optional, List, Dict
from markitdown import MarkItDown
from agents import MultiReviewerSystem
import requests
import time
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()


def extract_text_from_pdf(pdf_file) -> str:
    """Extract text content from a PDF file using markitdown."""
    try:
        if pdf_file is None:
            return ""

        md = MarkItDown()
        result = md.convert(pdf_file.name)
        return result.text_content

    except Exception as e:
        return f"Error extracting text from PDF: {str(e)}"


def search_semantic_scholar(query: str, limit: int = 5, api_key: str = None) -> List[Dict]:
    """Search for related papers on Semantic Scholar with rate limiting."""
    try:
        url = "https://api.semanticscholar.org/graph/v1/paper/search"
        params = {
            "query": query,
            "limit": limit,
            "fields": "title,authors,year,abstract,citationCount,url,openAccessPdf"
        }

        headers = {}
        if api_key:
            headers["x-api-key"] = api_key

        # Rate limiting: 1 request per second
        time.sleep(1)

        response = requests.get(url, params=params, headers=headers, timeout=10)
        response.raise_for_status()

        data = response.json()
        return data.get("data", [])

    except Exception as e:
        print(f"Error searching Semantic Scholar: {e}")
        return []


def format_semantic_scholar_results(papers: List[Dict]) -> str:
    """Format Semantic Scholar results for display."""
    if not papers:
        return "No related papers found."

    formatted = "## πŸ“š Related Papers from Semantic Scholar\n\n"

    for i, paper in enumerate(papers, 1):
        title = paper.get("title", "N/A")
        authors = ", ".join([a.get("name", "") for a in paper.get("authors", [])])
        year = paper.get("year", "N/A")
        citations = paper.get("citationCount", 0)
        abstract = paper.get("abstract", "No abstract available")
        url = paper.get("url", "")
        pdf_url = paper.get("openAccessPdf", {})

        formatted += f"### {i}. {title}\n\n"
        formatted += f"**Authors**: {authors}\n\n"
        formatted += f"**Year**: {year} | **Citations**: {citations}\n\n"
        formatted += f"**Abstract**: {abstract[:300]}{'...' if len(abstract) > 300 else ''}\n\n"

        if url:
            formatted += f"[View on Semantic Scholar]({url})"

        if pdf_url and pdf_url.get("url"):
            formatted += f" | [Download PDF]({pdf_url['url']})"

        formatted += "\n\n---\n\n"

    return formatted


def extract_paper_title_from_text(text: str) -> str:
    """Extract paper title from the beginning of the text."""
    lines = text.split('\n')
    for line in lines[:20]:  # Check first 20 lines
        line = line.strip()
        if len(line) > 20 and len(line) < 200:  # Reasonable title length
            return line
    return "Research Paper"


def review_paper(
    pdf_file,
    search_related: bool,
    progress=gr.Progress()
) -> tuple[str, str, str, str, str]:
    """Main function to process PDF and generate reviews."""

    if pdf_file is None:
        return "Please upload a PDF file.", "", "", "", ""

    # Get API credentials from environment variables
    final_api_key = os.getenv("OPENAI_API_KEY", "")
    final_base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
    final_model = os.getenv("MODEL_NAME", "gpt-3.5-turbo")

    if not final_api_key or final_api_key.strip() == "":
        return "Please provide an API key or set OPENAI_API_KEY environment variable.", "", "", "", ""

    # Extract text from PDF
    progress(0.1, desc="Extracting text from PDF...")
    paper_text = extract_text_from_pdf(pdf_file)

    if paper_text.startswith("Error"):
        return paper_text, "", "", "", ""

    if len(paper_text.strip()) == 0:
        return "Could not extract text from PDF. The file might be empty or image-based.", "", "", "", ""

    # Search for related papers if requested
    related_papers_md = ""
    if search_related:
        progress(0.2, desc="Searching for related papers...")
        paper_title = extract_paper_title_from_text(paper_text)
        semantic_scholar_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY", "")
        related_papers = search_semantic_scholar(paper_title, limit=5, api_key=semantic_scholar_key)
        related_papers_md = format_semantic_scholar_results(related_papers)

    # Initialize multi-reviewer system
    progress(0.3, desc="Initializing reviewers...")

    try:
        reviewer_system = MultiReviewerSystem(
            api_key=final_api_key,
            base_url=final_base_url,
            model=final_model
        )

        # Generate reviews
        def progress_callback(value, desc):
            progress(0.3 + (value * 0.7), desc=desc)

        result = reviewer_system.review_paper_sequential(
            paper_text,
            progress_callback=progress_callback
        )

        # Format summary
        summary = f"""
## Review Summary

**Average Score**: {result['average_score']:.2f}/10
**Successful Reviews**: {result['successful_reviews']}/{result['total_reviewers']}

---
"""

        # Extract individual reviews
        review_1 = ""
        review_2 = ""
        review_3 = ""

        for i, review_data in enumerate(result['reviews']):
            score_text = f"{review_data['score']:.2f}/10" if review_data['score'] else 'N/A'
            review_text = f"""
### {review_data['reviewer']}

**Score**: {score_text}

{review_data['review']}

---
"""
            if i == 0:
                review_1 = review_text
            elif i == 1:
                review_2 = review_text
            elif i == 2:
                review_3 = review_text

        return summary, review_1, review_2, review_3, related_papers_md

    except Exception as e:
        error_msg = f"Error during review process: {str(e)}"
        return error_msg, "", "", "", related_papers_md


# Create Gradio interface
with gr.Blocks(title="AI Literature Review System", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # πŸ“š AI-Powered Literature Review System

    Upload a research paper (PDF) and get comprehensive reviews from multiple AI agents with different perspectives.

    ## Features:
    - **Multi-Agent Review**: Three specialized reviewers evaluate your paper sequentially
    - **Comprehensive Analysis**: Originality, quality, clarity, significance, and more
    - **Detailed Feedback**: Strengths, weaknesses, questions, and suggestions
    - **Scoring System**: Based on top-tier conference standards (NeurIPS-style)
    - **Semantic Scholar Integration**: Find related papers for comparison
    """)

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### πŸ“€ Upload Paper")

            pdf_input = gr.File(
                label="Upload Research Paper (PDF)",
                file_types=[".pdf"],
                type="filepath"
            )

            search_related_checkbox = gr.Checkbox(
                label="Search for related papers on Semantic Scholar",
                value=True,
                info="Find similar papers for comparison"
            )

            submit_btn = gr.Button("πŸ” Review Paper", variant="primary", size="lg")

            gr.Markdown("""
            ### πŸ‘₯ Reviewers (Sequential):
            1. **Experimentalist**: Methodology and results
            2. **Impactist**: Impact and significance
            3. **Novelty Seeker**: Originality and innovation

            ### ⏱️ Processing Time:
            Expect 3-6 minutes for complete review
            (Sequential processing with rate limiting)
            """)

        with gr.Column(scale=2):
            gr.Markdown("### πŸ“Š Review Results")

            summary_output = gr.Markdown(label="Summary")

            with gr.Tabs():
                with gr.Tab("Reviewer 1: Experimentalist"):
                    review_1_output = gr.Markdown()

                with gr.Tab("Reviewer 2: Impactist"):
                    review_2_output = gr.Markdown()

                with gr.Tab("Reviewer 3: Novelty Seeker"):
                    review_3_output = gr.Markdown()

                with gr.Tab("Related Papers"):
                    related_papers_output = gr.Markdown()

    # Connect the button to the review function
    submit_btn.click(
        fn=review_paper,
        inputs=[pdf_input, search_related_checkbox],
        outputs=[summary_output, review_1_output, review_2_output, review_3_output, related_papers_output]
    )

    gr.Markdown("""
    ---
    ### πŸ“– How to Use:
    1. Upload your research paper in PDF format
    2. Optionally enable Semantic Scholar search for related papers
    3. Click "Review Paper" and wait for the sequential multi-agent analysis (3-6 minutes)
    4. Review the detailed feedback from all three reviewers

    ### πŸ“Š Score Interpretation:
    - **9-10**: Award Quality / Strong Accept
    - **7-8**: Accept
    - **5-6**: Borderline
    - **3-4**: Borderline Reject
    - **1-2**: Reject

    ### ⚠️ Notes:
    - Reviews are generated **sequentially** (one at a time) with rate limiting
    - Processing time: 3-6 minutes depending on paper length
    - Ensure your PDF contains extractable text (not scanned images)
    - All API credentials are pre-configured
    """)


if __name__ == "__main__":
    demo.launch(share=False)