eaglelandsonce commited on
Commit
09756c4
·
verified ·
1 Parent(s): 9899b40

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +410 -0
app.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import tempfile
4
+ from typing import Any, Dict, Tuple
5
+
6
+ import gradio as gr
7
+ from pypdf import PdfReader
8
+
9
+ try:
10
+ from openai import OpenAI
11
+ except Exception:
12
+ OpenAI = None # handled at runtime
13
+
14
+ DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
15
+
16
+ PROMPT_TASK_1 = """You are an expert research analyst.
17
+
18
+ You will be given the full text of a Notice of Funding Opportunity (NOFO).
19
+
20
+ Your task is to identify and extract the PRIMARY topic or focus area for which funding is being provided.
21
+
22
+ Instructions:
23
+ - Read the NOFO carefully.
24
+ - Determine the single, highest-level topic that best represents the purpose of the funding.
25
+ - Respond with the topic name ONLY.
26
+ - Do NOT include explanations, descriptions, bullet points, or additional text.
27
+ - Do NOT include punctuation before or after the topic.
28
+
29
+ Output Format:
30
+ <topic name only>
31
+ """
32
+
33
+ PROMPT_TASK_2 = """You are an expert research reviewer and grant analyst.
34
+
35
+ You will be provided with:
36
+ 1. The extracted funding topic from a Notice of Funding Opportunity (NOFO)
37
+ 2. The full text of a research paper
38
+
39
+ Your task is to determine whether the research paper is RELEVANT to the NOFO topic.
40
+
41
+ Relevance Criteria:
42
+ - Domain alignment (field, application area, or population)
43
+ - Methodological alignment (approaches, models, techniques)
44
+ - Theoretical or conceptual alignment
45
+ - Practical applicability to the NOFO’s funding objectives
46
+
47
+ Instructions:
48
+ - Carefully analyze the research paper in relation to the NOFO topic.
49
+ - If the paper does NOT meaningfully align by domain, method, theory, or application:
50
+ → Respond with exactly:
51
+ PAPER NOT RELATED TO TOPIC
52
+ - If the paper IS relevant:
53
+ → Provide a concise summary of the paper in under 300 words.
54
+ → Emphasize how the paper supports, informs, or could enable work aligned with the NOFO topic.
55
+
56
+ Output Format:
57
+ ### Output Format:
58
+ "summary": "<summary of the paper under 300 words OR PAPER NOT RELATED TO TOPIC>"
59
+ """
60
+
61
+ PROMPT_TASK_3 = """You are an expert research strategist and grant proposal architect.
62
+
63
+ You will be provided with:
64
+ 1. The funding topic extracted from a Notice of Funding Opportunity (NOFO)
65
+ 2. The full text of a research paper that has been determined to be RELEVANT to the NOFO
66
+
67
+ Your task is to generate research project ideas that could form the basis of a competitive proposal under this NOFO.
68
+
69
+ Instructions:
70
+ - Generate EXACTLY five (5) distinct research project ideas.
71
+ - Each idea must clearly align with the NOFO topic and objectives.
72
+ - Each idea must explicitly leverage insights, methods, or findings from the provided research paper.
73
+ - Each idea must be realistic, fundable, and suitable for an academic or applied research proposal.
74
+
75
+ For EACH idea, include the following fields in the exact order shown:
76
+ 1. Title – a concise project title
77
+ 2. Description – a clear explanation of the project idea (3–5 sentences)
78
+ 3. Citation – author(s) and year of the research paper
79
+ 4. NOFO Alignment – at least two specific ways the project aligns with the NOFO goals or requirements
80
+ 5. Research Paper Path – the file path or identifier for the paper used
81
+
82
+ Formatting Requirements:
83
+ - Separate each project idea using exactly three dashes:
84
+ ---
85
+ - Do NOT add numbering or bullet points.
86
+ - Do NOT include any text before the first idea or after the last idea.
87
+ - Maintain consistent formatting across all five ideas.
88
+
89
+ Output Format (repeat exactly five times):
90
+
91
+ Title:
92
+ Description:
93
+ Citation:
94
+ NOFO Alignment:
95
+ Research Paper Path:
96
+ """
97
+
98
+ PROMPT_TASK_4 = """You are an expert principal investigator and grant proposal writer.
99
+
100
+ You will be provided with:
101
+ 1. The Notice of Funding Opportunity (NOFO) topic and objectives
102
+ 2. ONE selected research project idea generated in Task 3
103
+ 3. The research paper that supports this idea
104
+
105
+ Your task is to write a COMPLETE, professional research proposal that is fully aligned with the NOFO requirements.
106
+
107
+ Instructions:
108
+ - Write the proposal in a formal grant-writing style.
109
+ - Explicitly align all sections to the NOFO goals, priorities, and evaluation criteria.
110
+ - Clearly show how the proposal builds upon and extends the provided research paper.
111
+ - Assume the audience is a technical and scientific review panel.
112
+
113
+ Required Proposal Sections (use these exact headings):
114
+
115
+ 1. Project Title
116
+ 2. Abstract (250–300 words)
117
+ 3. Background and Significance
118
+ - Problem statement
119
+ - Prior work and gaps
120
+ - Relevance to the NOFO topic
121
+ 4. Objectives and Specific Aims
122
+ 5. Research Methodology
123
+ - Study design
124
+ - Data sources
125
+ - Models, tools, or interventions
126
+ 6. Innovation
127
+ - What is novel compared to existing work
128
+ 7. Expected Outcomes and Impact
129
+ - Scientific, societal, or clinical impact
130
+ 8. Evaluation and Validation Plan
131
+ - Metrics, benchmarks, or success criteria
132
+ 9. Timeline and Milestones
133
+ - Phased plan over the project duration
134
+ 10. Risk Management and Mitigation
135
+ 11. Alignment with NOFO Priorities
136
+ - Explicit mapping to funding objectives
137
+ 12. References
138
+ - Include the provided research paper
139
+
140
+ Constraints:
141
+ - Keep the proposal concise but complete.
142
+ - Avoid generic filler language.
143
+ - Ensure internal consistency across sections.
144
+ - Do NOT include budget or personnel details unless explicitly stated in the NOFO.
145
+
146
+ Output Requirements:
147
+ - Use clear section headings exactly as listed.
148
+ - Do NOT include commentary, analysis, or explanations outside the proposal text.
149
+ - Return the proposal text ONLY.
150
+ """
151
+
152
+ PROMPT_TASK_5 = """You are an expert grant reviewer serving on a competitive scientific review panel.
153
+
154
+ You will be provided with:
155
+ 1. The Notice of Funding Opportunity (NOFO)
156
+ 2. A complete research proposal written in response to the NOFO
157
+
158
+ Your task is to critically evaluate the proposal using standard peer-review criteria.
159
+
160
+ Evaluation Criteria:
161
+ Evaluate the proposal on the following four dimensions:
162
+
163
+ 1. Innovation
164
+ 2. Significance
165
+ 3. Approach
166
+ 4. Investigator Expertise
167
+
168
+ Scoring Instructions:
169
+ - Assign a numeric score from 1 to 5 for EACH criterion:
170
+ 1 = Poor
171
+ 2 = Fair
172
+ 3 = Good
173
+ 4 = Very Good
174
+ 5 = Excellent
175
+ - Scores must reflect how competitive the proposal would be in a real funding review.
176
+ - Be objective, specific, and evidence-based.
177
+
178
+ For EACH criterion, provide:
179
+ - Score (integer 1–5)
180
+ - Justification (3–5 sentences)
181
+ - Key Strengths (bullet list)
182
+ - Key Weaknesses (bullet list)
183
+ - Actionable Recommendations for Improvement
184
+
185
+ Output Format:
186
+ Return VALID JSON only. Do NOT include markdown, explanations, or additional text.
187
+
188
+ Use the following JSON structure exactly:
189
+
190
+ {
191
+ "Innovation": {
192
+ "score": <1-5>,
193
+ "justification": "<text>",
194
+ "strengths": ["<item>", "<item>"],
195
+ "weaknesses": ["<item>", "<item>"],
196
+ "recommendations": ["<item>", "<item>"]
197
+ },
198
+ "Significance": {
199
+ "score": <1-5>,
200
+ "justification": "<text>",
201
+ "strengths": ["<item>", "<item>"],
202
+ "weaknesses": ["<item>", "<item>"],
203
+ "recommendations": ["<item>", "<item>"]
204
+ },
205
+ "Approach": {
206
+ "score": <1-5>,
207
+ "justification": "<text>",
208
+ "strengths": ["<item>", "<item>"],
209
+ "weaknesses": ["<item>", "<item>"],
210
+ "recommendations": ["<item>", "<item>"]
211
+ },
212
+ "Investigator Expertise": {
213
+ "score": <1-5>,
214
+ "justification": "<text>",
215
+ "strengths": ["<item>", "<item>"],
216
+ "weaknesses": ["<item>", "<item>"],
217
+ "recommendations": ["<item>", "<item>"]
218
+ }
219
+ }
220
+ """
221
+
222
+ SYSTEM_BASE = "You are precise, structured, and follow formatting rules exactly."
223
+
224
+
225
+ def extract_text_from_pdf(pdf_path: str) -> str:
226
+ reader = PdfReader(pdf_path)
227
+ parts = []
228
+ for page in reader.pages:
229
+ try:
230
+ txt = page.extract_text() or ""
231
+ except Exception:
232
+ txt = ""
233
+ if txt.strip():
234
+ parts.append(txt)
235
+ text = "\n\n".join(parts).strip()
236
+ if len(text) < 600:
237
+ raise ValueError(
238
+ "Insufficient extractable text. Please upload a text-based PDF (selectable text), not a scanned PDF."
239
+ )
240
+ return text
241
+
242
+
243
+ def openai_chat(api_key: str, system: str, user: str, model: str) -> str:
244
+ if OpenAI is None:
245
+ raise RuntimeError("OpenAI SDK is not installed. Please install 'openai'.")
246
+ if not api_key or not api_key.strip():
247
+ raise ValueError("Please provide an OpenAI API key.")
248
+ client = OpenAI(api_key=api_key.strip())
249
+ resp = client.chat.completions.create(
250
+ model=model,
251
+ messages=[
252
+ {"role": "system", "content": system},
253
+ {"role": "user", "content": user},
254
+ ],
255
+ temperature=0.2,
256
+ )
257
+ return (resp.choices[0].message.content or "").strip()
258
+
259
+
260
+ def run_pipeline(api_key: str, model: str, nofo_text: str, paper_text: str, paper_path: str):
261
+ # Task 1
262
+ topic = openai_chat(api_key, SYSTEM_BASE, PROMPT_TASK_1 + "\n\nNOFO TEXT:\n\n" + nofo_text, model)
263
+ topic = " ".join(topic.split()).strip()
264
+
265
+ # Task 2
266
+ t2_out = openai_chat(
267
+ api_key,
268
+ SYSTEM_BASE,
269
+ PROMPT_TASK_2 + "\n\nNOFO TOPIC:\n" + topic + "\n\nRESEARCH PAPER TEXT:\n\n" + paper_text,
270
+ model,
271
+ ).strip()
272
+
273
+ not_related = (t2_out.strip() == "PAPER NOT RELATED TO TOPIC") or ("PAPER NOT RELATED TO TOPIC" in t2_out)
274
+
275
+ if not_related:
276
+ t3_out = "SKIPPED - PAPER NOT RELATED TO TOPIC"
277
+ t4_out = "SKIPPED - PAPER NOT RELATED TO TOPIC"
278
+ t5_raw = "SKIPPED - PAPER NOT RELATED TO TOPIC"
279
+ t5_view = t5_raw
280
+ else:
281
+ # Task 3
282
+ t3_out = openai_chat(
283
+ api_key,
284
+ SYSTEM_BASE,
285
+ PROMPT_TASK_3
286
+ + "\n\nNOFO TOPIC:\n"
287
+ + topic
288
+ + "\n\nRESEARCH PAPER TEXT:\n\n"
289
+ + paper_text
290
+ + "\n\nRESEARCH PAPER PATH:\n"
291
+ + (paper_path or "uploaded_paper.pdf"),
292
+ model,
293
+ ).strip()
294
+
295
+ # First idea for Task 4
296
+ first_idea = t3_out.split("\n---\n")[0].strip() if "\n---\n" in t3_out else t3_out.split("---")[0].strip()
297
+
298
+ # Task 4
299
+ t4_out = openai_chat(
300
+ api_key,
301
+ SYSTEM_BASE,
302
+ PROMPT_TASK_4
303
+ + "\n\nNOFO TOPIC:\n"
304
+ + topic
305
+ + "\n\nSELECTED IDEA (FROM TASK 3):\n\n"
306
+ + first_idea
307
+ + "\n\nRESEARCH PAPER TEXT:\n\n"
308
+ + paper_text
309
+ + "\n\nNOFO TEXT:\n\n"
310
+ + nofo_text,
311
+ model,
312
+ ).strip()
313
+
314
+ # Task 5
315
+ t5_raw = openai_chat(
316
+ api_key,
317
+ SYSTEM_BASE,
318
+ PROMPT_TASK_5 + "\n\nNOFO TEXT:\n\n" + nofo_text + "\n\nPROPOSAL TEXT:\n\n" + t4_out,
319
+ model,
320
+ ).strip()
321
+
322
+ # JSON viewer parsing
323
+ try:
324
+ t5_view = json.loads(t5_raw)
325
+ except Exception:
326
+ start, end = t5_raw.find("{"), t5_raw.rfind("}")
327
+ if start != -1 and end != -1 and end > start:
328
+ candidate = t5_raw[start : end + 1]
329
+ try:
330
+ t5_view = json.loads(candidate)
331
+ t5_raw = candidate
332
+ except Exception:
333
+ t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw}
334
+ else:
335
+ t5_view = {"error": "Invalid JSON returned by model", "raw": t5_raw}
336
+
337
+ results: Dict[str, Any] = {
338
+ "model": model,
339
+ "task_1_topic": topic,
340
+ "task_2_relevance_summary": t2_out,
341
+ "task_3_project_ideas": t3_out,
342
+ "task_4_full_proposal": t4_out,
343
+ "task_5_review_scores_json": t5_raw,
344
+ }
345
+
346
+ fd, out_path = tempfile.mkstemp(prefix="nofo_paper_results_", suffix=".json")
347
+ os.close(fd)
348
+ with open(out_path, "w", encoding="utf-8") as f:
349
+ json.dump(results, f, ensure_ascii=False, indent=2)
350
+
351
+ return topic, t2_out, t3_out, t4_out, t5_view, out_path
352
+
353
+
354
+ def run_analysis(api_key, model, nofo_file, paper_file, nofo_state, paper_state, paper_path_state):
355
+ # Update cached texts if new files are uploaded
356
+ if nofo_file is not None:
357
+ nofo_state = extract_text_from_pdf(nofo_file.name)
358
+ if paper_file is not None:
359
+ paper_state = extract_text_from_pdf(paper_file.name)
360
+ paper_path_state = paper_file.name
361
+
362
+ if not nofo_state.strip():
363
+ return "", "ERROR: Upload a NOFO PDF.", "", "", {"error": "missing NOFO"}, None, nofo_state, paper_state, paper_path_state
364
+ if not paper_state.strip():
365
+ return "", "ERROR: Upload a Research Paper PDF.", "", "", {"error": "missing paper"}, None, nofo_state, paper_state, paper_path_state
366
+
367
+ topic, t2, t3, t4, t5, dl = run_pipeline(api_key, model, nofo_state, paper_state, paper_path_state)
368
+ return topic, t2, t3, t4, t5, dl, nofo_state, paper_state, paper_path_state
369
+
370
+
371
+ with gr.Blocks(title="NOFO ↔ Paper Comparator (5-Task Pipeline)") as demo:
372
+ gr.Markdown("# NOFO ↔ Paper Comparator (5-Task Pipeline)")
373
+
374
+ nofo_text_state = gr.State("")
375
+ paper_text_state = gr.State("")
376
+ paper_path_state = gr.State("")
377
+
378
+ with gr.Row():
379
+ api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
380
+ model = gr.Textbox(label="Model", value=DEFAULT_MODEL)
381
+
382
+ with gr.Row():
383
+ nofo_pdf = gr.File(label="NOFO PDF (drag & drop)", file_types=[".pdf"])
384
+ paper_pdf = gr.File(label="Research Paper PDF (drag & drop)", file_types=[".pdf"])
385
+
386
+ run_btn = gr.Button("Run Analysis", variant="primary")
387
+ clear_btn = gr.Button("Clear")
388
+
389
+ gr.Markdown("## Results")
390
+ task1_out = gr.Textbox(label="Task 1: Topic", lines=2)
391
+ task2_out = gr.Textbox(label="Task 2: Relevance Summary", lines=8)
392
+ task3_out = gr.Textbox(label="Task 3: 5 Project Ideas", lines=14)
393
+ task4_out = gr.Markdown(label="Task 4: Full Proposal")
394
+ task5_out = gr.JSON(label="Task 5: Review Scores (JSON)")
395
+ download_out = gr.File(label="Download Results (JSON)")
396
+
397
+ run_btn.click(
398
+ fn=run_analysis,
399
+ inputs=[api_key, model, nofo_pdf, paper_pdf, nofo_text_state, paper_text_state, paper_path_state],
400
+ outputs=[task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state],
401
+ )
402
+
403
+ clear_btn.click(
404
+ fn=lambda: ("", DEFAULT_MODEL, None, None, "", "", "", "", {}, None, "", "", ""),
405
+ inputs=[],
406
+ outputs=[api_key, model, nofo_pdf, paper_pdf, task1_out, task2_out, task3_out, task4_out, task5_out, download_out, nofo_text_state, paper_text_state, paper_path_state],
407
+ )
408
+
409
+ if __name__ == "__main__":
410
+ demo.launch()