Update app.py
Browse files
app.py
CHANGED
|
@@ -1,77 +1,85 @@
|
|
| 1 |
-
import
|
| 2 |
import pdfplumber
|
| 3 |
-
import re
|
| 4 |
from transformers import pipeline
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 10 |
-
critic = pipeline("text2text-generation", model="google/flan-t5-large")
|
| 11 |
-
return summarizer, critic
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
# PDF Text Extraction
|
| 16 |
def extract_text_from_pdf(pdf_file):
|
| 17 |
text = ""
|
| 18 |
with pdfplumber.open(pdf_file) as pdf:
|
| 19 |
for page in pdf.pages:
|
| 20 |
-
|
| 21 |
-
if
|
| 22 |
-
text +=
|
| 23 |
return text
|
| 24 |
|
| 25 |
-
#
|
| 26 |
def split_into_sections(text):
|
| 27 |
sections = {}
|
| 28 |
headings = ["abstract", "introduction", "methodology", "methods", "results", "discussion", "conclusion", "references"]
|
| 29 |
-
|
| 30 |
-
sections[
|
| 31 |
-
|
| 32 |
for line in text.splitlines():
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
sections[current_section] += line + "\n"
|
| 38 |
-
|
| 39 |
return sections
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
def
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
return critic(prompt, max_length=512, do_sample=False)[0]['generated_text']
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
| 56 |
|
| 57 |
-
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
|
| 60 |
-
with st.spinner("Reading and analyzing the paper..."):
|
| 61 |
-
full_text = extract_text_from_pdf(uploaded_file)
|
| 62 |
-
sections = split_into_sections(full_text)
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
-
|
| 69 |
-
for sec, content in sections.items():
|
| 70 |
-
if content.strip():
|
| 71 |
-
with st.expander(f"π {sec.capitalize()}"):
|
| 72 |
-
st.subheader("Critique")
|
| 73 |
-
st.write(critique_section(content[:1000])) # Limit input size
|
| 74 |
-
st.subheader("Research Gaps")
|
| 75 |
-
st.write(identify_research_gaps(content[:1000]))
|
| 76 |
|
| 77 |
-
st.success("Analysis complete β
")
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
import pdfplumber
|
|
|
|
| 3 |
from transformers import pipeline
|
| 4 |
|
| 5 |
+
# Load models
|
| 6 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 7 |
+
critic = pipeline("text2text-generation", model="google/flan-t5-large")
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
# PDF text extraction
|
|
|
|
|
|
|
| 10 |
def extract_text_from_pdf(pdf_file):
|
| 11 |
text = ""
|
| 12 |
with pdfplumber.open(pdf_file) as pdf:
|
| 13 |
for page in pdf.pages:
|
| 14 |
+
content = page.extract_text()
|
| 15 |
+
if content:
|
| 16 |
+
text += content + "\n"
|
| 17 |
return text
|
| 18 |
|
| 19 |
+
# Split text into sections
|
| 20 |
def split_into_sections(text):
|
| 21 |
sections = {}
|
| 22 |
headings = ["abstract", "introduction", "methodology", "methods", "results", "discussion", "conclusion", "references"]
|
| 23 |
+
current = "others"
|
| 24 |
+
sections[current] = ""
|
|
|
|
| 25 |
for line in text.splitlines():
|
| 26 |
+
if any(h in line.lower() for h in headings):
|
| 27 |
+
current = next((h for h in headings if h in line.lower()), "others")
|
| 28 |
+
sections[current] = ""
|
| 29 |
+
sections[current] += line + "\n"
|
|
|
|
|
|
|
| 30 |
return sections
|
| 31 |
|
| 32 |
+
# LLM-based functions
|
| 33 |
+
def summarize(text): return summarizer(text[:3000])[0]['summary_text']
|
| 34 |
+
def critique(text): return critic(f"Critique this section:\n{text}", max_length=512)[0]['generated_text']
|
| 35 |
+
def find_gaps(text): return critic(f"What research gaps are present?\n{text}", max_length=512)[0]['generated_text']
|
| 36 |
+
def custom_query(text): return critic(text, max_length=512)[0]['generated_text']
|
| 37 |
+
|
| 38 |
+
# Main handler
|
| 39 |
+
def analyze(file, pasted_text, custom_prompt):
|
| 40 |
+
if file is not None:
|
| 41 |
+
text = extract_text_from_pdf(file.name)
|
| 42 |
+
elif pasted_text.strip():
|
| 43 |
+
text = pasted_text
|
| 44 |
+
else:
|
| 45 |
+
return "Please upload a file or enter some text.", "", "", ""
|
| 46 |
|
| 47 |
+
# Generate outputs
|
| 48 |
+
summary = summarize(text)
|
| 49 |
+
sections = split_into_sections(text)
|
| 50 |
|
| 51 |
+
critique_output = ""
|
| 52 |
+
gap_output = ""
|
|
|
|
| 53 |
|
| 54 |
+
for sec, content in sections.items():
|
| 55 |
+
snippet = content[:1000]
|
| 56 |
+
critique_output += f"### {sec.capitalize()}\n**Critique:**\n{critique(snippet)}\n\n"
|
| 57 |
+
gap_output += f"### {sec.capitalize()}\n**Research Gaps:**\n{find_gaps(snippet)}\n\n"
|
| 58 |
|
| 59 |
+
custom_response = ""
|
| 60 |
+
if custom_prompt.strip():
|
| 61 |
+
custom_response = custom_query(custom_prompt)
|
| 62 |
|
| 63 |
+
return summary, critique_output, gap_output, custom_response
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
# Gradio UI
|
| 66 |
+
demo = gr.Interface(
|
| 67 |
+
fn=analyze,
|
| 68 |
+
inputs=[
|
| 69 |
+
gr.File(label="Upload PDF", file_types=[".pdf"]),
|
| 70 |
+
gr.Textbox(lines=8, placeholder="Or paste your research text here...", label="Pasted Text"),
|
| 71 |
+
gr.Textbox(lines=2, placeholder="Optional: Ask a custom question", label="Custom Prompt")
|
| 72 |
+
],
|
| 73 |
+
outputs=[
|
| 74 |
+
gr.Markdown(label="π Summary"),
|
| 75 |
+
gr.Markdown(label="π§ Critique by Section"),
|
| 76 |
+
gr.Markdown(label="π Research Gaps"),
|
| 77 |
+
gr.Markdown(label="π¬ Custom Prompt Response")
|
| 78 |
+
],
|
| 79 |
+
title="π Research Paper Critique Generator",
|
| 80 |
+
description="Upload a research paper (PDF) or paste its content. Get section-wise critiques, summaries, and gap analysis using LLMs.",
|
| 81 |
+
theme="default"
|
| 82 |
+
)
|
| 83 |
|
| 84 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
|
|