import os import json import tempfile from typing import Any, Dict, Tuple import gradio as gr from pypdf import PdfReader try: from openai import OpenAI except Exception: OpenAI = None # handled at runtime DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini") PROMPT_TASK_1 = """You are an expert research analyst. You will be given the full text of a Notice of Funding Opportunity (NOFO). Your task is to identify and extract the PRIMARY topic or focus area for which funding is being provided. Instructions: - Read the NOFO carefully. - Determine the single, highest-level topic that best represents the purpose of the funding. - Respond with the topic name ONLY. - Do NOT include explanations, descriptions, bullet points, or additional text. - Do NOT include punctuation before or after the topic. Output Format: """ PROMPT_TASK_2 = """You are an expert research reviewer and grant analyst. You will be provided with: 1. The extracted funding topic from a Notice of Funding Opportunity (NOFO) 2. The full text of a research paper Your task is to determine whether the research paper is RELEVANT to the NOFO topic. Relevance Criteria: - Domain alignment (field, application area, or population) - Methodological alignment (approaches, models, techniques) - Theoretical or conceptual alignment - Practical applicability to the NOFO’s funding objectives Instructions: - Carefully analyze the research paper in relation to the NOFO topic. - If the paper does NOT meaningfully align by domain, method, theory, or application: → Respond with exactly: PAPER NOT RELATED TO TOPIC - If the paper IS relevant: → Provide a concise summary of the paper in under 300 words. → Emphasize how the paper supports, informs, or could enable work aligned with the NOFO topic. Output Format: ### Output Format: "summary": "" """ PROMPT_TASK_3 = """You are an expert research strategist and grant proposal architect. You will be provided with: 1. The funding topic extracted from a Notice of Funding Opportunity (NOFO) 2. The full text of a research paper that has been determined to be RELEVANT to the NOFO Your task is to generate research project ideas that could form the basis of a competitive proposal under this NOFO. Instructions: - Generate EXACTLY five (5) distinct research project ideas. - Each idea must clearly align with the NOFO topic and objectives. - Each idea must explicitly leverage insights, methods, or findings from the provided research paper. - Each idea must be realistic, fundable, and suitable for an academic or applied research proposal. For EACH idea, include the following fields in the exact order shown: 1. Title – a concise project title 2. Description – a clear explanation of the project idea (3–5 sentences) 3. Citation – author(s) and year of the research paper 4. NOFO Alignment – at least two specific ways the project aligns with the NOFO goals or requirements 5. Research Paper Path – the file path or identifier for the paper used Formatting Requirements: - Separate each project idea using exactly three dashes: --- - Do NOT add numbering or bullet points. - Do NOT include any text before the first idea or after the last idea. - Maintain consistent formatting across all five ideas. Output Format (repeat exactly five times): Title: Description: Citation: NOFO Alignment: Research Paper Path: """ PROMPT_TASK_4 = """You are an expert principal investigator and grant proposal writer. You will be provided with: 1. The Notice of Funding Opportunity (NOFO) topic and objectives 2. ONE selected research project idea generated in Task 3 3. The research paper that supports this idea Your task is to write a COMPLETE, professional research proposal that is fully aligned with the NOFO requirements. Instructions: - Write the proposal in a formal grant-writing style. - Explicitly align all sections to the NOFO goals, priorities, and evaluation criteria. - Clearly show how the proposal builds upon and extends the provided research paper. - Assume the audience is a technical and scientific review panel. Required Proposal Sections (use these exact headings): 1. Project Title 2. Abstract (250–300 words) 3. Background and Significance - Problem statement - Prior work and gaps - Relevance to the NOFO topic 4. Objectives and Specific Aims 5. Research Methodology - Study design - Data sources - Models, tools, or interventions 6. Innovation - What is novel compared to existing work 7. Expected Outcomes and Impact - Scientific, societal, or clinical impact 8. Evaluation and Validation Plan - Metrics, benchmarks, or success criteria 9. Timeline and Milestones - Phased plan over the project duration 10. Risk Management and Mitigation 11. Alignment with NOFO Priorities - Explicit mapping to funding objectives 12. References - Include the provided research paper Constraints: - Keep the proposal concise but complete. - Avoid generic filler language. - Ensure internal consistency across sections. - Do NOT include budget or personnel details unless explicitly stated in the NOFO. Output Requirements: - Use clear section headings exactly as listed. - Do NOT include commentary, analysis, or explanations outside the proposal text. - Return the proposal text ONLY. """ PROMPT_TASK_5 = """You are an expert grant reviewer serving on a competitive scientific review panel. You will be provided with: 1. The Notice of Funding Opportunity (NOFO) 2. A complete research proposal written in response to the NOFO Your task is to critically evaluate the proposal using standard peer-review criteria. Evaluation Criteria: Evaluate the proposal on the following four dimensions: 1. Innovation 2. Significance 3. Approach 4. Investigator Expertise Scoring Instructions: - Assign a numeric score from 1 to 5 for EACH criterion: 1 = Poor 2 = Fair 3 = Good 4 = Very Good 5 = Excellent - Scores must reflect how competitive the proposal would be in a real funding review. - Be objective, specific, and evidence-based. For EACH criterion, provide: - Score (integer 1–5) - Justification (3–5 sentences) - Key Strengths (bullet list) - Key Weaknesses (bullet list) - Actionable Recommendations for Improvement Output Format: Return VALID JSON only. Do NOT include markdown, explanations, or additional text. Use the following JSON structure exactly: { "Innovation": { "score": <1-5>, "justification": "", "strengths": ["", ""], "weaknesses": ["", ""], "recommendations": ["", ""] }, "Significance": { "score": <1-5>, "justification": "", "strengths": ["", ""], "weaknesses": ["", ""], "recommendations": ["", ""] }, "Approach": { "score": <1-5>, "justification": "", "strengths": ["", ""], "weaknesses": ["", ""], "recommendations": ["", ""] }, "Investigator Expertise": { "score": <1-5>, "justification": "", "strengths": ["", ""], "weaknesses": ["", ""], "recommendations": ["", ""] } } """ SYSTEM_BASE = "You are precise, structured, and follow formatting rules exactly." def extract_text_from_pdf(pdf_path: str) -> str: reader = PdfReader(pdf_path) parts = [] for page in reader.pages: try: txt = page.extract_text() or "" except Exception: txt = "" if txt.strip(): parts.append(txt) text = "\n\n".join(parts).strip() if len(text) < 600: raise ValueError( "Insufficient extractable text. Please upload a text-based PDF (selectable text), not a scanned PDF." ) return text def openai_chat(api_key: str, system: str, user: str, model: str) -> str: if OpenAI is None: raise RuntimeError("OpenAI SDK is not installed. Please install 'openai'.") if not api_key or not api_key.strip(): raise ValueError("Please provide an OpenAI API key.") client = OpenAI(api_key=api_key.strip()) resp = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": system}, {"role": "user", "content": user}, ], temperature=0.2, ) return (resp.choices[0].message.content or "").strip() def run_pipeline(api_key: str, model: str, nofo_text: str, paper_text: str, paper_path: str): # Task 1 topic = openai_chat(api_key, SYSTEM_BASE, PROMPT_TASK_1 + "\n\nNOFO TEXT:\n\n" + nofo_text, model) topic = " ".join(topic.split()).strip() # Task 2 t2_out = openai_chat( api_key, SYSTEM_BASE, PROMPT_TASK_2 + "\n\nNOFO TOPIC:\n" + topic + "\n\nRESEARCH PAPER TEXT:\n\n" + paper_text, model, ).strip() not_related = (t2_out.strip() == "PAPER NOT RELATED TO TOPIC") or ("PAPER NOT RELATED TO TOPIC" in t2_out) if not_related: t3_out = "SKIPPED - PAPER NOT RELATED TO TOPIC" t4_out = "SKIPPED - PAPER NOT RELATED TO TOPIC" t5_raw = "SKIPPED - PAPER NOT RELATED TO TOPIC" t5_view = t5_raw else: # Task 3 t3_out = openai_chat( api_key, SYSTEM_BASE, PROMPT_TASK_3 + "\n\nNOFO TOPIC:\n" + topic + "\n\nRESEARCH PAPER TEXT:\n\n" + paper_text + "\n\nRESEARCH PAPER PATH:\n" + (paper_path or "uploaded_paper.pdf"), model, ).strip() # First idea for Task 4 first_idea = t3_out.split("\n---\n")[0].strip() if "\n---\n" in t3_out else t3_out.split("---")[0].strip() # Task 4 t4_out = openai_chat( api_key, SYSTEM_BASE, PROMPT_TASK_4 + "\n\nNOFO TOPIC:\n" + topic + "\n\nSELECTED IDEA (FROM TASK 3):\n\n" + first_idea + "\n\nRESEARCH PAPER TEXT:\n\n" + paper_text + "\n\nNOFO TEXT:\n\n" + nofo_text, model, ).strip() # Task 5 t5_raw = openai_chat( api_key, SYSTEM_BASE, PROMPT_TASK_5 + "\n\nNOFO TEXT:\n\n" + nofo_text + "\n\nPROPOSAL TEXT:\n\n" + t4_out, model, ).strip() # JSON viewer parsing try: t5_view = json.loads(t5_raw) except Exception: start, end = t5_raw.find("{"), t5_raw.rfind("}") if start != -1 and end != -1 and end > start: candidate = t5_raw[start : end + 1] try: t5_view = json.loads(candidate) t5_raw = candidate except Exception: t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw} else: t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw} results: Dict[str, Any] = { "model": model, "task_1_topic": topic, "task_2_relevance_summary": t2_out, "task_3_project_ideas": t3_out, "task_4_full_proposal": t4_out, "task_5_review_scores_json": t5_raw, } fd, out_path = tempfile.mkstemp(prefix="nofo_paper_results_", suffix=".json") os.close(fd) with open(out_path, "w", encoding="utf-8") as f: json.dump(results, f, ensure_ascii=False, indent=2) return topic, t2_out, t3_out, t4_out, t5_view, out_path def run_analysis(api_key, model, nofo_file, paper_file, nofo_state, paper_state, paper_path_state): # Update cached texts if new files are uploaded if nofo_file is not None: nofo_state = extract_text_from_pdf(nofo_file.name) if paper_file is not None: paper_state = extract_text_from_pdf(paper_file.name) paper_path_state = paper_file.name if not nofo_state.strip(): return "", "ERROR: Upload a NOFO PDF.", "", "", {"error": "missing NOFO"}, None, nofo_state, paper_state, paper_path_state if not paper_state.strip(): return "", "ERROR: Upload a Research Paper PDF.", "", "", {"error": "missing paper"}, None, nofo_state, paper_state, paper_path_state topic, t2, t3, t4, t5, dl = run_pipeline(api_key, model, nofo_state, paper_state, paper_path_state) return topic, t2, t3, t4, t5, dl, nofo_state, paper_state, paper_path_state with gr.Blocks(title="NOFO ↔ Paper Comparator (5-Task Pipeline)") as demo: gr.Markdown("# NOFO ↔ Paper Comparator (5-Task Pipeline)") nofo_text_state = gr.State("") paper_text_state = gr.State("") paper_path_state = gr.State("") with gr.Row(): api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...") model = gr.Textbox(label="Model", value=DEFAULT_MODEL) with gr.Row(): nofo_pdf = gr.File(label="NOFO PDF (drag & drop)", file_types=[".pdf"]) paper_pdf = gr.File(label="Research Paper PDF (drag & drop)", file_types=[".pdf"]) run_btn = gr.Button("Run Analysis", variant="primary") clear_btn = gr.Button("Clear") gr.Markdown("## Results") task1_out = gr.Textbox(label="Task 1: Topic", lines=2) task2_out = gr.Textbox(label="Task 2: Relevance Summary", lines=8) task3_out = gr.Textbox(label="Task 3: 5 Project Ideas", lines=14) task4_out = gr.Markdown(label="Task 4: Full Proposal") task5_out = gr.JSON(label="Task 5: Review Scores (JSON)") download_out = gr.File(label="Download Results (JSON)") run_btn.click( fn=run_analysis, inputs=[api_key, model, nofo_pdf, paper_pdf, nofo_text_state, paper_text_state, paper_path_state], outputs=[task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state], ) clear_btn.click( fn=lambda: ("", DEFAULT_MODEL, None, None, "", "", "", "", {}, None, "", "", ""), inputs=[], outputs=[api_key, model, nofo_pdf, paper_pdf, task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state], ) if __name__ == "__main__": demo.launch()