Spaces:

eaglelandsonce
/

Research_Compare

Sleeping

File size: 14,317 Bytes

09756c4

import os
import json
import tempfile
from typing import Any, Dict, Tuple

import gradio as gr
from pypdf import PdfReader

try:
    from openai import OpenAI
except Exception:
    OpenAI = None  # handled at runtime

DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")

PROMPT_TASK_1 = """You are an expert research analyst.

You will be given the full text of a Notice of Funding Opportunity (NOFO).

Your task is to identify and extract the PRIMARY topic or focus area for which funding is being provided.

Instructions:
- Read the NOFO carefully.
- Determine the single, highest-level topic that best represents the purpose of the funding.
- Respond with the topic name ONLY.
- Do NOT include explanations, descriptions, bullet points, or additional text.
- Do NOT include punctuation before or after the topic.

Output Format:
<topic name only>
"""

PROMPT_TASK_2 = """You are an expert research reviewer and grant analyst.

You will be provided with:
1. The extracted funding topic from a Notice of Funding Opportunity (NOFO)
2. The full text of a research paper

Your task is to determine whether the research paper is RELEVANT to the NOFO topic.

Relevance Criteria:
- Domain alignment (field, application area, or population)
- Methodological alignment (approaches, models, techniques)
- Theoretical or conceptual alignment
- Practical applicability to the NOFO’s funding objectives

Instructions:
- Carefully analyze the research paper in relation to the NOFO topic.
- If the paper does NOT meaningfully align by domain, method, theory, or application:
  → Respond with exactly:
    PAPER NOT RELATED TO TOPIC
- If the paper IS relevant:
  → Provide a concise summary of the paper in under 300 words.
  → Emphasize how the paper supports, informs, or could enable work aligned with the NOFO topic.

Output Format:
### Output Format:
"summary": "<summary of the paper under 300 words OR PAPER NOT RELATED TO TOPIC>"
"""

PROMPT_TASK_3 = """You are an expert research strategist and grant proposal architect.

You will be provided with:
1. The funding topic extracted from a Notice of Funding Opportunity (NOFO)
2. The full text of a research paper that has been determined to be RELEVANT to the NOFO

Your task is to generate research project ideas that could form the basis of a competitive proposal under this NOFO.

Instructions:
- Generate EXACTLY five (5) distinct research project ideas.
- Each idea must clearly align with the NOFO topic and objectives.
- Each idea must explicitly leverage insights, methods, or findings from the provided research paper.
- Each idea must be realistic, fundable, and suitable for an academic or applied research proposal.

For EACH idea, include the following fields in the exact order shown:
1. Title – a concise project title
2. Description – a clear explanation of the project idea (3–5 sentences)
3. Citation – author(s) and year of the research paper
4. NOFO Alignment – at least two specific ways the project aligns with the NOFO goals or requirements
5. Research Paper Path – the file path or identifier for the paper used

Formatting Requirements:
- Separate each project idea using exactly three dashes:
---
- Do NOT add numbering or bullet points.
- Do NOT include any text before the first idea or after the last idea.
- Maintain consistent formatting across all five ideas.

Output Format (repeat exactly five times):

Title:
Description:
Citation:
NOFO Alignment:
Research Paper Path:
"""

PROMPT_TASK_4 = """You are an expert principal investigator and grant proposal writer.

You will be provided with:
1. The Notice of Funding Opportunity (NOFO) topic and objectives
2. ONE selected research project idea generated in Task 3
3. The research paper that supports this idea

Your task is to write a COMPLETE, professional research proposal that is fully aligned with the NOFO requirements.

Instructions:
- Write the proposal in a formal grant-writing style.
- Explicitly align all sections to the NOFO goals, priorities, and evaluation criteria.
- Clearly show how the proposal builds upon and extends the provided research paper.
- Assume the audience is a technical and scientific review panel.

Required Proposal Sections (use these exact headings):

1. Project Title
2. Abstract (250–300 words)
3. Background and Significance
   - Problem statement
   - Prior work and gaps
   - Relevance to the NOFO topic
4. Objectives and Specific Aims
5. Research Methodology
   - Study design
   - Data sources
   - Models, tools, or interventions
6. Innovation
   - What is novel compared to existing work
7. Expected Outcomes and Impact
   - Scientific, societal, or clinical impact
8. Evaluation and Validation Plan
   - Metrics, benchmarks, or success criteria
9. Timeline and Milestones
   - Phased plan over the project duration
10. Risk Management and Mitigation
11. Alignment with NOFO Priorities
   - Explicit mapping to funding objectives
12. References
   - Include the provided research paper

Constraints:
- Keep the proposal concise but complete.
- Avoid generic filler language.
- Ensure internal consistency across sections.
- Do NOT include budget or personnel details unless explicitly stated in the NOFO.

Output Requirements:
- Use clear section headings exactly as listed.
- Do NOT include commentary, analysis, or explanations outside the proposal text.
- Return the proposal text ONLY.
"""

PROMPT_TASK_5 = """You are an expert grant reviewer serving on a competitive scientific review panel.

You will be provided with:
1. The Notice of Funding Opportunity (NOFO)
2. A complete research proposal written in response to the NOFO

Your task is to critically evaluate the proposal using standard peer-review criteria.

Evaluation Criteria:
Evaluate the proposal on the following four dimensions:

1. Innovation
2. Significance
3. Approach
4. Investigator Expertise

Scoring Instructions:
- Assign a numeric score from 1 to 5 for EACH criterion:
  1 = Poor
  2 = Fair
  3 = Good
  4 = Very Good
  5 = Excellent
- Scores must reflect how competitive the proposal would be in a real funding review.
- Be objective, specific, and evidence-based.

For EACH criterion, provide:
- Score (integer 1–5)
- Justification (3–5 sentences)
- Key Strengths (bullet list)
- Key Weaknesses (bullet list)
- Actionable Recommendations for Improvement

Output Format:
Return VALID JSON only. Do NOT include markdown, explanations, or additional text.

Use the following JSON structure exactly:

{
  "Innovation": {
    "score": <1-5>,
    "justification": "<text>",
    "strengths": ["<item>", "<item>"],
    "weaknesses": ["<item>", "<item>"],
    "recommendations": ["<item>", "<item>"]
  },
  "Significance": {
    "score": <1-5>,
    "justification": "<text>",
    "strengths": ["<item>", "<item>"],
    "weaknesses": ["<item>", "<item>"],
    "recommendations": ["<item>", "<item>"]
  },
  "Approach": {
    "score": <1-5>,
    "justification": "<text>",
    "strengths": ["<item>", "<item>"],
    "weaknesses": ["<item>", "<item>"],
    "recommendations": ["<item>", "<item>"]
  },
  "Investigator Expertise": {
    "score": <1-5>,
    "justification": "<text>",
    "strengths": ["<item>", "<item>"],
    "weaknesses": ["<item>", "<item>"],
    "recommendations": ["<item>", "<item>"]
  }
}
"""

SYSTEM_BASE = "You are precise, structured, and follow formatting rules exactly."


def extract_text_from_pdf(pdf_path: str) -> str:
    reader = PdfReader(pdf_path)
    parts = []
    for page in reader.pages:
        try:
            txt = page.extract_text() or ""
        except Exception:
            txt = ""
        if txt.strip():
            parts.append(txt)
    text = "\n\n".join(parts).strip()
    if len(text) < 600:
        raise ValueError(
            "Insufficient extractable text. Please upload a text-based PDF (selectable text), not a scanned PDF."
        )
    return text


def openai_chat(api_key: str, system: str, user: str, model: str) -> str:
    if OpenAI is None:
        raise RuntimeError("OpenAI SDK is not installed. Please install 'openai'.")
    if not api_key or not api_key.strip():
        raise ValueError("Please provide an OpenAI API key.")
    client = OpenAI(api_key=api_key.strip())
    resp = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": user},
        ],
        temperature=0.2,
    )
    return (resp.choices[0].message.content or "").strip()


def run_pipeline(api_key: str, model: str, nofo_text: str, paper_text: str, paper_path: str):
    # Task 1
    topic = openai_chat(api_key, SYSTEM_BASE, PROMPT_TASK_1 + "\n\nNOFO TEXT:\n\n" + nofo_text, model)
    topic = " ".join(topic.split()).strip()

    # Task 2
    t2_out = openai_chat(
        api_key,
        SYSTEM_BASE,
        PROMPT_TASK_2 + "\n\nNOFO TOPIC:\n" + topic + "\n\nRESEARCH PAPER TEXT:\n\n" + paper_text,
        model,
    ).strip()

    not_related = (t2_out.strip() == "PAPER NOT RELATED TO TOPIC") or ("PAPER NOT RELATED TO TOPIC" in t2_out)

    if not_related:
        t3_out = "SKIPPED - PAPER NOT RELATED TO TOPIC"
        t4_out = "SKIPPED - PAPER NOT RELATED TO TOPIC"
        t5_raw = "SKIPPED - PAPER NOT RELATED TO TOPIC"
        t5_view = t5_raw
    else:
        # Task 3
        t3_out = openai_chat(
            api_key,
            SYSTEM_BASE,
            PROMPT_TASK_3
            + "\n\nNOFO TOPIC:\n"
            + topic
            + "\n\nRESEARCH PAPER TEXT:\n\n"
            + paper_text
            + "\n\nRESEARCH PAPER PATH:\n"
            + (paper_path or "uploaded_paper.pdf"),
            model,
        ).strip()

        # First idea for Task 4
        first_idea = t3_out.split("\n---\n")[0].strip() if "\n---\n" in t3_out else t3_out.split("---")[0].strip()

        # Task 4
        t4_out = openai_chat(
            api_key,
            SYSTEM_BASE,
            PROMPT_TASK_4
            + "\n\nNOFO TOPIC:\n"
            + topic
            + "\n\nSELECTED IDEA (FROM TASK 3):\n\n"
            + first_idea
            + "\n\nRESEARCH PAPER TEXT:\n\n"
            + paper_text
            + "\n\nNOFO TEXT:\n\n"
            + nofo_text,
            model,
        ).strip()

        # Task 5
        t5_raw = openai_chat(
            api_key,
            SYSTEM_BASE,
            PROMPT_TASK_5 + "\n\nNOFO TEXT:\n\n" + nofo_text + "\n\nPROPOSAL TEXT:\n\n" + t4_out,
            model,
        ).strip()

        # JSON viewer parsing
        try:
            t5_view = json.loads(t5_raw)
        except Exception:
            start, end = t5_raw.find("{"), t5_raw.rfind("}")
            if start != -1 and end != -1 and end > start:
                candidate = t5_raw[start : end + 1]
                try:
                    t5_view = json.loads(candidate)
                    t5_raw = candidate
                except Exception:
                    t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw}
            else:
                t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw}

    results: Dict[str, Any] = {
        "model": model,
        "task_1_topic": topic,
        "task_2_relevance_summary": t2_out,
        "task_3_project_ideas": t3_out,
        "task_4_full_proposal": t4_out,
        "task_5_review_scores_json": t5_raw,
    }

    fd, out_path = tempfile.mkstemp(prefix="nofo_paper_results_", suffix=".json")
    os.close(fd)
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

    return topic, t2_out, t3_out, t4_out, t5_view, out_path


def run_analysis(api_key, model, nofo_file, paper_file, nofo_state, paper_state, paper_path_state):
    # Update cached texts if new files are uploaded
    if nofo_file is not None:
        nofo_state = extract_text_from_pdf(nofo_file.name)
    if paper_file is not None:
        paper_state = extract_text_from_pdf(paper_file.name)
        paper_path_state = paper_file.name

    if not nofo_state.strip():
        return "", "ERROR: Upload a NOFO PDF.", "", "", {"error": "missing NOFO"}, None, nofo_state, paper_state, paper_path_state
    if not paper_state.strip():
        return "", "ERROR: Upload a Research Paper PDF.", "", "", {"error": "missing paper"}, None, nofo_state, paper_state, paper_path_state

    topic, t2, t3, t4, t5, dl = run_pipeline(api_key, model, nofo_state, paper_state, paper_path_state)
    return topic, t2, t3, t4, t5, dl, nofo_state, paper_state, paper_path_state


with gr.Blocks(title="NOFO ↔ Paper Comparator (5-Task Pipeline)") as demo:
    gr.Markdown("# NOFO ↔ Paper Comparator (5-Task Pipeline)")

    nofo_text_state = gr.State("")
    paper_text_state = gr.State("")
    paper_path_state = gr.State("")

    with gr.Row():
        api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
        model = gr.Textbox(label="Model", value=DEFAULT_MODEL)

    with gr.Row():
        nofo_pdf = gr.File(label="NOFO PDF (drag & drop)", file_types=[".pdf"])
        paper_pdf = gr.File(label="Research Paper PDF (drag & drop)", file_types=[".pdf"])

    run_btn = gr.Button("Run Analysis", variant="primary")
    clear_btn = gr.Button("Clear")

    gr.Markdown("## Results")
    task1_out = gr.Textbox(label="Task 1: Topic", lines=2)
    task2_out = gr.Textbox(label="Task 2: Relevance Summary", lines=8)
    task3_out = gr.Textbox(label="Task 3: 5 Project Ideas", lines=14)
    task4_out = gr.Markdown(label="Task 4: Full Proposal")
    task5_out = gr.JSON(label="Task 5: Review Scores (JSON)")
    download_out = gr.File(label="Download Results (JSON)")

    run_btn.click(
        fn=run_analysis,
        inputs=[api_key, model, nofo_pdf, paper_pdf, nofo_text_state, paper_text_state, paper_path_state],
        outputs=[task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state],
    )

    clear_btn.click(
        fn=lambda: ("", DEFAULT_MODEL, None, None, "", "", "", "", {}, None, "", "", ""),
        inputs=[],
        outputs=[api_key, model, nofo_pdf, paper_pdf, task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state],
    )

if __name__ == "__main__":
    demo.launch()