Spaces:
Sleeping
Sleeping
Nur Arifin Akbar
Remove API configuration forms from frontend - use environment variables only
6760933
| """Gradio app for AI-powered literature review system with Semantic Scholar integration.""" | |
| import gradio as gr | |
| import os | |
| from typing import Optional, List, Dict | |
| from markitdown import MarkItDown | |
| from agents import MultiReviewerSystem | |
| import requests | |
| import time | |
| from dotenv import load_dotenv | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| def extract_text_from_pdf(pdf_file) -> str: | |
| """Extract text content from a PDF file using markitdown.""" | |
| try: | |
| if pdf_file is None: | |
| return "" | |
| md = MarkItDown() | |
| result = md.convert(pdf_file.name) | |
| return result.text_content | |
| except Exception as e: | |
| return f"Error extracting text from PDF: {str(e)}" | |
| def search_semantic_scholar(query: str, limit: int = 5, api_key: str = None) -> List[Dict]: | |
| """Search for related papers on Semantic Scholar with rate limiting.""" | |
| try: | |
| url = "https://api.semanticscholar.org/graph/v1/paper/search" | |
| params = { | |
| "query": query, | |
| "limit": limit, | |
| "fields": "title,authors,year,abstract,citationCount,url,openAccessPdf" | |
| } | |
| headers = {} | |
| if api_key: | |
| headers["x-api-key"] = api_key | |
| # Rate limiting: 1 request per second | |
| time.sleep(1) | |
| response = requests.get(url, params=params, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| return data.get("data", []) | |
| except Exception as e: | |
| print(f"Error searching Semantic Scholar: {e}") | |
| return [] | |
| def format_semantic_scholar_results(papers: List[Dict]) -> str: | |
| """Format Semantic Scholar results for display.""" | |
| if not papers: | |
| return "No related papers found." | |
| formatted = "## π Related Papers from Semantic Scholar\n\n" | |
| for i, paper in enumerate(papers, 1): | |
| title = paper.get("title", "N/A") | |
| authors = ", ".join([a.get("name", "") for a in paper.get("authors", [])]) | |
| year = paper.get("year", "N/A") | |
| citations = paper.get("citationCount", 0) | |
| abstract = paper.get("abstract", "No abstract available") | |
| url = paper.get("url", "") | |
| pdf_url = paper.get("openAccessPdf", {}) | |
| formatted += f"### {i}. {title}\n\n" | |
| formatted += f"**Authors**: {authors}\n\n" | |
| formatted += f"**Year**: {year} | **Citations**: {citations}\n\n" | |
| formatted += f"**Abstract**: {abstract[:300]}{'...' if len(abstract) > 300 else ''}\n\n" | |
| if url: | |
| formatted += f"[View on Semantic Scholar]({url})" | |
| if pdf_url and pdf_url.get("url"): | |
| formatted += f" | [Download PDF]({pdf_url['url']})" | |
| formatted += "\n\n---\n\n" | |
| return formatted | |
| def extract_paper_title_from_text(text: str) -> str: | |
| """Extract paper title from the beginning of the text.""" | |
| lines = text.split('\n') | |
| for line in lines[:20]: # Check first 20 lines | |
| line = line.strip() | |
| if len(line) > 20 and len(line) < 200: # Reasonable title length | |
| return line | |
| return "Research Paper" | |
| def review_paper( | |
| pdf_file, | |
| search_related: bool, | |
| progress=gr.Progress() | |
| ) -> tuple[str, str, str, str, str]: | |
| """Main function to process PDF and generate reviews.""" | |
| if pdf_file is None: | |
| return "Please upload a PDF file.", "", "", "", "" | |
| # Get API credentials from environment variables | |
| final_api_key = os.getenv("OPENAI_API_KEY", "") | |
| final_base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") | |
| final_model = os.getenv("MODEL_NAME", "gpt-3.5-turbo") | |
| if not final_api_key or final_api_key.strip() == "": | |
| return "Please provide an API key or set OPENAI_API_KEY environment variable.", "", "", "", "" | |
| # Extract text from PDF | |
| progress(0.1, desc="Extracting text from PDF...") | |
| paper_text = extract_text_from_pdf(pdf_file) | |
| if paper_text.startswith("Error"): | |
| return paper_text, "", "", "", "" | |
| if len(paper_text.strip()) == 0: | |
| return "Could not extract text from PDF. The file might be empty or image-based.", "", "", "", "" | |
| # Search for related papers if requested | |
| related_papers_md = "" | |
| if search_related: | |
| progress(0.2, desc="Searching for related papers...") | |
| paper_title = extract_paper_title_from_text(paper_text) | |
| semantic_scholar_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY", "") | |
| related_papers = search_semantic_scholar(paper_title, limit=5, api_key=semantic_scholar_key) | |
| related_papers_md = format_semantic_scholar_results(related_papers) | |
| # Initialize multi-reviewer system | |
| progress(0.3, desc="Initializing reviewers...") | |
| try: | |
| reviewer_system = MultiReviewerSystem( | |
| api_key=final_api_key, | |
| base_url=final_base_url, | |
| model=final_model | |
| ) | |
| # Generate reviews | |
| def progress_callback(value, desc): | |
| progress(0.3 + (value * 0.7), desc=desc) | |
| result = reviewer_system.review_paper_sequential( | |
| paper_text, | |
| progress_callback=progress_callback | |
| ) | |
| # Format summary | |
| summary = f""" | |
| ## Review Summary | |
| **Average Score**: {result['average_score']:.2f}/10 | |
| **Successful Reviews**: {result['successful_reviews']}/{result['total_reviewers']} | |
| --- | |
| """ | |
| # Extract individual reviews | |
| review_1 = "" | |
| review_2 = "" | |
| review_3 = "" | |
| for i, review_data in enumerate(result['reviews']): | |
| score_text = f"{review_data['score']:.2f}/10" if review_data['score'] else 'N/A' | |
| review_text = f""" | |
| ### {review_data['reviewer']} | |
| **Score**: {score_text} | |
| {review_data['review']} | |
| --- | |
| """ | |
| if i == 0: | |
| review_1 = review_text | |
| elif i == 1: | |
| review_2 = review_text | |
| elif i == 2: | |
| review_3 = review_text | |
| return summary, review_1, review_2, review_3, related_papers_md | |
| except Exception as e: | |
| error_msg = f"Error during review process: {str(e)}" | |
| return error_msg, "", "", "", related_papers_md | |
| # Create Gradio interface | |
| with gr.Blocks(title="AI Literature Review System", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π AI-Powered Literature Review System | |
| Upload a research paper (PDF) and get comprehensive reviews from multiple AI agents with different perspectives. | |
| ## Features: | |
| - **Multi-Agent Review**: Three specialized reviewers evaluate your paper sequentially | |
| - **Comprehensive Analysis**: Originality, quality, clarity, significance, and more | |
| - **Detailed Feedback**: Strengths, weaknesses, questions, and suggestions | |
| - **Scoring System**: Based on top-tier conference standards (NeurIPS-style) | |
| - **Semantic Scholar Integration**: Find related papers for comparison | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π€ Upload Paper") | |
| pdf_input = gr.File( | |
| label="Upload Research Paper (PDF)", | |
| file_types=[".pdf"], | |
| type="filepath" | |
| ) | |
| search_related_checkbox = gr.Checkbox( | |
| label="Search for related papers on Semantic Scholar", | |
| value=True, | |
| info="Find similar papers for comparison" | |
| ) | |
| submit_btn = gr.Button("π Review Paper", variant="primary", size="lg") | |
| gr.Markdown(""" | |
| ### π₯ Reviewers (Sequential): | |
| 1. **Experimentalist**: Methodology and results | |
| 2. **Impactist**: Impact and significance | |
| 3. **Novelty Seeker**: Originality and innovation | |
| ### β±οΈ Processing Time: | |
| Expect 3-6 minutes for complete review | |
| (Sequential processing with rate limiting) | |
| """) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π Review Results") | |
| summary_output = gr.Markdown(label="Summary") | |
| with gr.Tabs(): | |
| with gr.Tab("Reviewer 1: Experimentalist"): | |
| review_1_output = gr.Markdown() | |
| with gr.Tab("Reviewer 2: Impactist"): | |
| review_2_output = gr.Markdown() | |
| with gr.Tab("Reviewer 3: Novelty Seeker"): | |
| review_3_output = gr.Markdown() | |
| with gr.Tab("Related Papers"): | |
| related_papers_output = gr.Markdown() | |
| # Connect the button to the review function | |
| submit_btn.click( | |
| fn=review_paper, | |
| inputs=[pdf_input, search_related_checkbox], | |
| outputs=[summary_output, review_1_output, review_2_output, review_3_output, related_papers_output] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### π How to Use: | |
| 1. Upload your research paper in PDF format | |
| 2. Optionally enable Semantic Scholar search for related papers | |
| 3. Click "Review Paper" and wait for the sequential multi-agent analysis (3-6 minutes) | |
| 4. Review the detailed feedback from all three reviewers | |
| ### π Score Interpretation: | |
| - **9-10**: Award Quality / Strong Accept | |
| - **7-8**: Accept | |
| - **5-6**: Borderline | |
| - **3-4**: Borderline Reject | |
| - **1-2**: Reject | |
| ### β οΈ Notes: | |
| - Reviews are generated **sequentially** (one at a time) with rate limiting | |
| - Processing time: 3-6 minutes depending on paper length | |
| - Ensure your PDF contains extractable text (not scanned images) | |
| - All API credentials are pre-configured | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch(share=False) | |