Spaces:

syaikhipin
/

PaperReview

Sleeping

PaperReview / app.py

Nur Arifin Akbar

Remove API configuration forms from frontend - use environment variables only

6760933 3 months ago

9.59 kB

	"""Gradio app for AI-powered literature review system with Semantic Scholar integration."""

	import gradio as gr
	import os
	from typing import Optional, List, Dict
	from markitdown import MarkItDown
	from agents import MultiReviewerSystem
	import requests
	import time
	from dotenv import load_dotenv

	# Load environment variables from .env file
	load_dotenv()


	def extract_text_from_pdf(pdf_file) -> str:
	"""Extract text content from a PDF file using markitdown."""
	try:
	if pdf_file is None:
	return ""

	md = MarkItDown()
	result = md.convert(pdf_file.name)
	return result.text_content

	except Exception as e:
	return f"Error extracting text from PDF: {str(e)}"


	def search_semantic_scholar(query: str, limit: int = 5, api_key: str = None) -> List[Dict]:
	"""Search for related papers on Semantic Scholar with rate limiting."""
	try:
	url = "https://api.semanticscholar.org/graph/v1/paper/search"
	params = {
	"query": query,
	"limit": limit,
	"fields": "title,authors,year,abstract,citationCount,url,openAccessPdf"
	}

	headers = {}
	if api_key:
	headers["x-api-key"] = api_key

	# Rate limiting: 1 request per second
	time.sleep(1)

	response = requests.get(url, params=params, headers=headers, timeout=10)
	response.raise_for_status()

	data = response.json()
	return data.get("data", [])

	except Exception as e:
	print(f"Error searching Semantic Scholar: {e}")
	return []


	def format_semantic_scholar_results(papers: List[Dict]) -> str:
	"""Format Semantic Scholar results for display."""
	if not papers:
	return "No related papers found."

	formatted = "## 📚 Related Papers from Semantic Scholar\n\n"

	for i, paper in enumerate(papers, 1):
	title = paper.get("title", "N/A")
	authors = ", ".join([a.get("name", "") for a in paper.get("authors", [])])
	year = paper.get("year", "N/A")
	citations = paper.get("citationCount", 0)
	abstract = paper.get("abstract", "No abstract available")
	url = paper.get("url", "")
	pdf_url = paper.get("openAccessPdf", {})

	formatted += f"### {i}. {title}\n\n"
	formatted += f"Authors: {authors}\n\n"
	formatted += f"Year: {year} \| Citations: {citations}\n\n"
	formatted += f"Abstract: {abstract[:300]}{'...' if len(abstract) > 300 else ''}\n\n"

	if url:
	formatted += f"[View on Semantic Scholar]({url})"

	if pdf_url and pdf_url.get("url"):
	formatted += f" \| [Download PDF]({pdf_url['url']})"

	formatted += "\n\n---\n\n"

	return formatted


	def extract_paper_title_from_text(text: str) -> str:
	"""Extract paper title from the beginning of the text."""
	lines = text.split('\n')
	for line in lines[:20]: # Check first 20 lines
	line = line.strip()
	if len(line) > 20 and len(line) < 200: # Reasonable title length
	return line
	return "Research Paper"


	def review_paper(
	pdf_file,
	search_related: bool,
	progress=gr.Progress()
	) -> tuple[str, str, str, str, str]:
	"""Main function to process PDF and generate reviews."""

	if pdf_file is None:
	return "Please upload a PDF file.", "", "", "", ""

	# Get API credentials from environment variables
	final_api_key = os.getenv("OPENAI_API_KEY", "")
	final_base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
	final_model = os.getenv("MODEL_NAME", "gpt-3.5-turbo")

	if not final_api_key or final_api_key.strip() == "":
	return "Please provide an API key or set OPENAI_API_KEY environment variable.", "", "", "", ""

	# Extract text from PDF
	progress(0.1, desc="Extracting text from PDF...")
	paper_text = extract_text_from_pdf(pdf_file)

	if paper_text.startswith("Error"):
	return paper_text, "", "", "", ""

	if len(paper_text.strip()) == 0:
	return "Could not extract text from PDF. The file might be empty or image-based.", "", "", "", ""

	# Search for related papers if requested
	related_papers_md = ""
	if search_related:
	progress(0.2, desc="Searching for related papers...")
	paper_title = extract_paper_title_from_text(paper_text)
	semantic_scholar_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY", "")
	related_papers = search_semantic_scholar(paper_title, limit=5, api_key=semantic_scholar_key)
	related_papers_md = format_semantic_scholar_results(related_papers)

	# Initialize multi-reviewer system
	progress(0.3, desc="Initializing reviewers...")

	try:
	reviewer_system = MultiReviewerSystem(
	api_key=final_api_key,
	base_url=final_base_url,
	model=final_model
	)

	# Generate reviews
	def progress_callback(value, desc):
	progress(0.3 + (value * 0.7), desc=desc)

	result = reviewer_system.review_paper_sequential(
	paper_text,
	progress_callback=progress_callback
	)

	# Format summary
	summary = f"""
	## Review Summary

	Average Score: {result['average_score']:.2f}/10
	Successful Reviews: {result['successful_reviews']}/{result['total_reviewers']}

	---
	"""

	# Extract individual reviews
	review_1 = ""
	review_2 = ""
	review_3 = ""

	for i, review_data in enumerate(result['reviews']):
	score_text = f"{review_data['score']:.2f}/10" if review_data['score'] else 'N/A'
	review_text = f"""
	### {review_data['reviewer']}

	Score: {score_text}

	{review_data['review']}

	---
	"""
	if i == 0:
	review_1 = review_text
	elif i == 1:
	review_2 = review_text
	elif i == 2:
	review_3 = review_text

	return summary, review_1, review_2, review_3, related_papers_md

	except Exception as e:
	error_msg = f"Error during review process: {str(e)}"
	return error_msg, "", "", "", related_papers_md


	# Create Gradio interface
	with gr.Blocks(title="AI Literature Review System", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 📚 AI-Powered Literature Review System

	Upload a research paper (PDF) and get comprehensive reviews from multiple AI agents with different perspectives.

	## Features:
	- Multi-Agent Review: Three specialized reviewers evaluate your paper sequentially
	- Comprehensive Analysis: Originality, quality, clarity, significance, and more
	- Detailed Feedback: Strengths, weaknesses, questions, and suggestions
	- Scoring System: Based on top-tier conference standards (NeurIPS-style)
	- Semantic Scholar Integration: Find related papers for comparison
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📤 Upload Paper")

	pdf_input = gr.File(
	label="Upload Research Paper (PDF)",
	file_types=[".pdf"],
	type="filepath"
	)

	search_related_checkbox = gr.Checkbox(
	label="Search for related papers on Semantic Scholar",
	value=True,
	info="Find similar papers for comparison"
	)

	submit_btn = gr.Button("🔍 Review Paper", variant="primary", size="lg")

	gr.Markdown("""
	### 👥 Reviewers (Sequential):
	1. Experimentalist: Methodology and results
	2. Impactist: Impact and significance
	3. Novelty Seeker: Originality and innovation

	### ⏱️ Processing Time:
	Expect 3-6 minutes for complete review
	(Sequential processing with rate limiting)
	""")

	with gr.Column(scale=2):
	gr.Markdown("### 📊 Review Results")

	summary_output = gr.Markdown(label="Summary")

	with gr.Tabs():
	with gr.Tab("Reviewer 1: Experimentalist"):
	review_1_output = gr.Markdown()

	with gr.Tab("Reviewer 2: Impactist"):
	review_2_output = gr.Markdown()

	with gr.Tab("Reviewer 3: Novelty Seeker"):
	review_3_output = gr.Markdown()

	with gr.Tab("Related Papers"):
	related_papers_output = gr.Markdown()

	# Connect the button to the review function
	submit_btn.click(
	fn=review_paper,
	inputs=[pdf_input, search_related_checkbox],
	outputs=[summary_output, review_1_output, review_2_output, review_3_output, related_papers_output]
	)

	gr.Markdown("""
	---
	### 📖 How to Use:
	1. Upload your research paper in PDF format
	2. Optionally enable Semantic Scholar search for related papers
	3. Click "Review Paper" and wait for the sequential multi-agent analysis (3-6 minutes)
	4. Review the detailed feedback from all three reviewers

	### 📊 Score Interpretation:
	- 9-10: Award Quality / Strong Accept
	- 7-8: Accept
	- 5-6: Borderline
	- 3-4: Borderline Reject
	- 1-2: Reject

	### ⚠️ Notes:
	- Reviews are generated sequentially (one at a time) with rate limiting
	- Processing time: 3-6 minutes depending on paper length
	- Ensure your PDF contains extractable text (not scanned images)
	- All API credentials are pre-configured
	""")


	if __name__ == "__main__":
	demo.launch(share=False)