Spaces:

eaglelandsonce
/

Research_Compare

Sleeping

App Files Files Community

eaglelandsonce commited on Jan 11

Commit

09756c4

verified ·

1 Parent(s): 9899b40

Create app.py

Browse files

Files changed (1) hide show

app.py +410 -0

app.py ADDED Viewed

	@@ -0,0 +1,410 @@

+import os
+import json
+import tempfile
+from typing import Any, Dict, Tuple
+import gradio as gr
+from pypdf import PdfReader
+try:
+    from openai import OpenAI
+except Exception:
+    OpenAI = None  # handled at runtime
+DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
+PROMPT_TASK_1 = """You are an expert research analyst.
+You will be given the full text of a Notice of Funding Opportunity (NOFO).
+Your task is to identify and extract the PRIMARY topic or focus area for which funding is being provided.
+Instructions:
+- Read the NOFO carefully.
+- Determine the single, highest-level topic that best represents the purpose of the funding.
+- Respond with the topic name ONLY.
+- Do NOT include explanations, descriptions, bullet points, or additional text.
+- Do NOT include punctuation before or after the topic.
+Output Format:
+<topic name only>
+"""
+PROMPT_TASK_2 = """You are an expert research reviewer and grant analyst.
+You will be provided with:
+1. The extracted funding topic from a Notice of Funding Opportunity (NOFO)
+2. The full text of a research paper
+Your task is to determine whether the research paper is RELEVANT to the NOFO topic.
+Relevance Criteria:
+- Domain alignment (field, application area, or population)
+- Methodological alignment (approaches, models, techniques)
+- Theoretical or conceptual alignment
+- Practical applicability to the NOFO’s funding objectives
+Instructions:
+- Carefully analyze the research paper in relation to the NOFO topic.
+- If the paper does NOT meaningfully align by domain, method, theory, or application:
+  → Respond with exactly:
+    PAPER NOT RELATED TO TOPIC
+- If the paper IS relevant:
+  → Provide a concise summary of the paper in under 300 words.
+  → Emphasize how the paper supports, informs, or could enable work aligned with the NOFO topic.
+Output Format:
+### Output Format:
+"summary": "<summary of the paper under 300 words OR PAPER NOT RELATED TO TOPIC>"
+"""
+PROMPT_TASK_3 = """You are an expert research strategist and grant proposal architect.
+You will be provided with:
+1. The funding topic extracted from a Notice of Funding Opportunity (NOFO)
+2. The full text of a research paper that has been determined to be RELEVANT to the NOFO
+Your task is to generate research project ideas that could form the basis of a competitive proposal under this NOFO.
+Instructions:
+- Generate EXACTLY five (5) distinct research project ideas.
+- Each idea must clearly align with the NOFO topic and objectives.
+- Each idea must explicitly leverage insights, methods, or findings from the provided research paper.
+- Each idea must be realistic, fundable, and suitable for an academic or applied research proposal.
+For EACH idea, include the following fields in the exact order shown:
+1. Title – a concise project title
+2. Description – a clear explanation of the project idea (3–5 sentences)
+3. Citation – author(s) and year of the research paper
+4. NOFO Alignment – at least two specific ways the project aligns with the NOFO goals or requirements
+5. Research Paper Path – the file path or identifier for the paper used
+Formatting Requirements:
+- Separate each project idea using exactly three dashes:
+---
+- Do NOT add numbering or bullet points.
+- Do NOT include any text before the first idea or after the last idea.
+- Maintain consistent formatting across all five ideas.
+Output Format (repeat exactly five times):
+Title:
+Description:
+Citation:
+NOFO Alignment:
+Research Paper Path:
+"""
+PROMPT_TASK_4 = """You are an expert principal investigator and grant proposal writer.
+You will be provided with:
+1. The Notice of Funding Opportunity (NOFO) topic and objectives
+2. ONE selected research project idea generated in Task 3
+3. The research paper that supports this idea
+Your task is to write a COMPLETE, professional research proposal that is fully aligned with the NOFO requirements.
+Instructions:
+- Write the proposal in a formal grant-writing style.
+- Explicitly align all sections to the NOFO goals, priorities, and evaluation criteria.
+- Clearly show how the proposal builds upon and extends the provided research paper.
+- Assume the audience is a technical and scientific review panel.
+Required Proposal Sections (use these exact headings):
+1. Project Title
+2. Abstract (250–300 words)
+3. Background and Significance
+   - Problem statement
+   - Prior work and gaps
+   - Relevance to the NOFO topic
+4. Objectives and Specific Aims
+5. Research Methodology
+   - Study design
+   - Data sources
+   - Models, tools, or interventions
+6. Innovation
+   - What is novel compared to existing work
+7. Expected Outcomes and Impact
+   - Scientific, societal, or clinical impact
+8. Evaluation and Validation Plan
+   - Metrics, benchmarks, or success criteria
+9. Timeline and Milestones
+   - Phased plan over the project duration
+10. Risk Management and Mitigation
+11. Alignment with NOFO Priorities
+   - Explicit mapping to funding objectives
+12. References
+   - Include the provided research paper
+Constraints:
+- Keep the proposal concise but complete.
+- Avoid generic filler language.
+- Ensure internal consistency across sections.
+- Do NOT include budget or personnel details unless explicitly stated in the NOFO.
+Output Requirements:
+- Use clear section headings exactly as listed.
+- Do NOT include commentary, analysis, or explanations outside the proposal text.
+- Return the proposal text ONLY.
+"""
+PROMPT_TASK_5 = """You are an expert grant reviewer serving on a competitive scientific review panel.
+You will be provided with:
+1. The Notice of Funding Opportunity (NOFO)
+2. A complete research proposal written in response to the NOFO
+Your task is to critically evaluate the proposal using standard peer-review criteria.
+Evaluation Criteria:
+Evaluate the proposal on the following four dimensions:
+1. Innovation
+2. Significance
+3. Approach
+4. Investigator Expertise
+Scoring Instructions:
+- Assign a numeric score from 1 to 5 for EACH criterion:
+  1 = Poor
+  2 = Fair
+  3 = Good
+  4 = Very Good
+  5 = Excellent
+- Scores must reflect how competitive the proposal would be in a real funding review.
+- Be objective, specific, and evidence-based.
+For EACH criterion, provide:
+- Score (integer 1–5)
+- Justification (3–5 sentences)
+- Key Strengths (bullet list)
+- Key Weaknesses (bullet list)
+- Actionable Recommendations for Improvement
+Output Format:
+Return VALID JSON only. Do NOT include markdown, explanations, or additional text.
+Use the following JSON structure exactly:
+{
+  "Innovation": {
+    "score": <1-5>,
+    "justification": "<text>",
+    "strengths": ["<item>", "<item>"],
+    "weaknesses": ["<item>", "<item>"],
+    "recommendations": ["<item>", "<item>"]
+  },
+  "Significance": {
+    "score": <1-5>,
+    "justification": "<text>",
+    "strengths": ["<item>", "<item>"],
+    "weaknesses": ["<item>", "<item>"],
+    "recommendations": ["<item>", "<item>"]
+  },
+  "Approach": {
+    "score": <1-5>,
+    "justification": "<text>",
+    "strengths": ["<item>", "<item>"],
+    "weaknesses": ["<item>", "<item>"],
+    "recommendations": ["<item>", "<item>"]
+  },
+  "Investigator Expertise": {
+    "score": <1-5>,
+    "justification": "<text>",
+    "strengths": ["<item>", "<item>"],
+    "weaknesses": ["<item>", "<item>"],
+    "recommendations": ["<item>", "<item>"]
+  }
+}
+"""
+SYSTEM_BASE = "You are precise, structured, and follow formatting rules exactly."
+def extract_text_from_pdf(pdf_path: str) -> str:
+    reader = PdfReader(pdf_path)
+    parts = []
+    for page in reader.pages:
+        try:
+            txt = page.extract_text() or ""
+        except Exception:
+            txt = ""
+        if txt.strip():
+            parts.append(txt)
+    text = "\n\n".join(parts).strip()
+    if len(text) < 600:
+        raise ValueError(
+            "Insufficient extractable text. Please upload a text-based PDF (selectable text), not a scanned PDF."
+        )
+    return text
+def openai_chat(api_key: str, system: str, user: str, model: str) -> str:
+    if OpenAI is None:
+        raise RuntimeError("OpenAI SDK is not installed. Please install 'openai'.")
+    if not api_key or not api_key.strip():
+        raise ValueError("Please provide an OpenAI API key.")
+    client = OpenAI(api_key=api_key.strip())
+    resp = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
+        ],
+        temperature=0.2,
+    )
+    return (resp.choices[0].message.content or "").strip()
+def run_pipeline(api_key: str, model: str, nofo_text: str, paper_text: str, paper_path: str):
+    # Task 1
+    topic = openai_chat(api_key, SYSTEM_BASE, PROMPT_TASK_1 + "\n\nNOFO TEXT:\n\n" + nofo_text, model)
+    topic = " ".join(topic.split()).strip()
+    # Task 2
+    t2_out = openai_chat(
+        api_key,
+        SYSTEM_BASE,
+        PROMPT_TASK_2 + "\n\nNOFO TOPIC:\n" + topic + "\n\nRESEARCH PAPER TEXT:\n\n" + paper_text,
+        model,
+    ).strip()
+    not_related = (t2_out.strip() == "PAPER NOT RELATED TO TOPIC") or ("PAPER NOT RELATED TO TOPIC" in t2_out)
+    if not_related:
+        t3_out = "SKIPPED - PAPER NOT RELATED TO TOPIC"
+        t4_out = "SKIPPED - PAPER NOT RELATED TO TOPIC"
+        t5_raw = "SKIPPED - PAPER NOT RELATED TO TOPIC"
+        t5_view = t5_raw
+    else:
+        # Task 3
+        t3_out = openai_chat(
+            api_key,
+            SYSTEM_BASE,
+            PROMPT_TASK_3
+            + "\n\nNOFO TOPIC:\n"
+            + topic
+            + "\n\nRESEARCH PAPER TEXT:\n\n"
+            + paper_text
+            + "\n\nRESEARCH PAPER PATH:\n"
+            + (paper_path or "uploaded_paper.pdf"),
+            model,
+        ).strip()
+        # First idea for Task 4
+        first_idea = t3_out.split("\n---\n")[0].strip() if "\n---\n" in t3_out else t3_out.split("---")[0].strip()
+        # Task 4
+        t4_out = openai_chat(
+            api_key,
+            SYSTEM_BASE,
+            PROMPT_TASK_4
+            + "\n\nNOFO TOPIC:\n"
+            + topic
+            + "\n\nSELECTED IDEA (FROM TASK 3):\n\n"
+            + first_idea
+            + "\n\nRESEARCH PAPER TEXT:\n\n"
+            + paper_text
+            + "\n\nNOFO TEXT:\n\n"
+            + nofo_text,
+            model,
+        ).strip()
+        # Task 5
+        t5_raw = openai_chat(
+            api_key,
+            SYSTEM_BASE,
+            PROMPT_TASK_5 + "\n\nNOFO TEXT:\n\n" + nofo_text + "\n\nPROPOSAL TEXT:\n\n" + t4_out,
+            model,
+        ).strip()
+        # JSON viewer parsing
+        try:
+            t5_view = json.loads(t5_raw)
+        except Exception:
+            start, end = t5_raw.find("{"), t5_raw.rfind("}")
+            if start != -1 and end != -1 and end > start:
+                candidate = t5_raw[start : end + 1]
+                try:
+                    t5_view = json.loads(candidate)
+                    t5_raw = candidate
+                except Exception:
+                    t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw}
+            else:
+                t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw}
+    results: Dict[str, Any] = {
+        "model": model,
+        "task_1_topic": topic,
+        "task_2_relevance_summary": t2_out,
+        "task_3_project_ideas": t3_out,
+        "task_4_full_proposal": t4_out,
+        "task_5_review_scores_json": t5_raw,
+    }
+    fd, out_path = tempfile.mkstemp(prefix="nofo_paper_results_", suffix=".json")
+    os.close(fd)
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    return topic, t2_out, t3_out, t4_out, t5_view, out_path
+def run_analysis(api_key, model, nofo_file, paper_file, nofo_state, paper_state, paper_path_state):
+    # Update cached texts if new files are uploaded
+    if nofo_file is not None:
+        nofo_state = extract_text_from_pdf(nofo_file.name)
+    if paper_file is not None:
+        paper_state = extract_text_from_pdf(paper_file.name)
+        paper_path_state = paper_file.name
+    if not nofo_state.strip():
+        return "", "ERROR: Upload a NOFO PDF.", "", "", {"error": "missing NOFO"}, None, nofo_state, paper_state, paper_path_state
+    if not paper_state.strip():
+        return "", "ERROR: Upload a Research Paper PDF.", "", "", {"error": "missing paper"}, None, nofo_state, paper_state, paper_path_state
+    topic, t2, t3, t4, t5, dl = run_pipeline(api_key, model, nofo_state, paper_state, paper_path_state)
+    return topic, t2, t3, t4, t5, dl, nofo_state, paper_state, paper_path_state
+with gr.Blocks(title="NOFO ↔ Paper Comparator (5-Task Pipeline)") as demo:
+    gr.Markdown("# NOFO ↔ Paper Comparator (5-Task Pipeline)")
+    nofo_text_state = gr.State("")
+    paper_text_state = gr.State("")
+    paper_path_state = gr.State("")
+    with gr.Row():
+        api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
+        model = gr.Textbox(label="Model", value=DEFAULT_MODEL)
+    with gr.Row():
+        nofo_pdf = gr.File(label="NOFO PDF (drag & drop)", file_types=[".pdf"])
+        paper_pdf = gr.File(label="Research Paper PDF (drag & drop)", file_types=[".pdf"])
+    run_btn = gr.Button("Run Analysis", variant="primary")
+    clear_btn = gr.Button("Clear")
+    gr.Markdown("## Results")
+    task1_out = gr.Textbox(label="Task 1: Topic", lines=2)
+    task2_out = gr.Textbox(label="Task 2: Relevance Summary", lines=8)
+    task3_out = gr.Textbox(label="Task 3: 5 Project Ideas", lines=14)
+    task4_out = gr.Markdown(label="Task 4: Full Proposal")
+    task5_out = gr.JSON(label="Task 5: Review Scores (JSON)")
+    download_out = gr.File(label="Download Results (JSON)")
+    run_btn.click(
+        fn=run_analysis,
+        inputs=[api_key, model, nofo_pdf, paper_pdf, nofo_text_state, paper_text_state, paper_path_state],
+        outputs=[task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state],
+    )
+    clear_btn.click(
+        fn=lambda: ("", DEFAULT_MODEL, None, None, "", "", "", "", {}, None, "", "", ""),
+        inputs=[],
+        outputs=[api_key, model, nofo_pdf, paper_pdf, task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state],
+    )
+if __name__ == "__main__":
+    demo.launch()