File size: 2,803 Bytes
01bcd3d 60c8b88 d3b3dd1 60c8b88 d3b3dd1 01bcd3d 60c8b88 d3b3dd1 60c8b88 d3b3dd1 60c8b88 d3b3dd1 60c8b88 d3b3dd1 01bcd3d d3b3dd1 01bcd3d d3b3dd1 60c8b88 d3b3dd1 60c8b88 01bcd3d 60c8b88 01bcd3d d3b3dd1 60c8b88 d3b3dd1 01bcd3d d3b3dd1 60c8b88 d3b3dd1 60c8b88 d3b3dd1 01bcd3d d3b3dd1 01bcd3d d3b3dd1 01bcd3d d3b3dd1 01bcd3d d3b3dd1 01bcd3d 60c8b88 d3b3dd1 60c8b88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
import pdfplumber
import re
from transformers import pipeline
# Load models once
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
critic = pipeline("text2text-generation", model="google/flan-t5-large")
# Extract and clean PDF text
def extract_text(file):
with pdfplumber.open(file) as pdf:
return "\n".join([page.extract_text() or "" for page in pdf.pages])
# Split text into sections based on common headings
def extract_sections(text):
pattern = r'\n(?P<header>[A-Z][A-Za-z ]{3,40})\n'
matches = list(re.finditer(pattern, text))
sections = {}
for i, match in enumerate(matches):
start = match.end()
end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
sections[match.group("header").strip().lower()] = text[start:end].strip()
return sections or {"full text": text}
# Limit content for faster inference
def truncate(text, max_tokens=1000):
return text[:max_tokens * 5] # approx. chars per token
# Run analysis
def analyze(file, pasted_text, custom_prompt):
raw_text = extract_text(file.name) if file else pasted_text.strip()
if not raw_text:
return "No text provided.", "", "", ""
summary = summarizer(truncate(raw_text), max_length=250, min_length=100, do_sample=False)[0]['summary_text']
sections = extract_sections(raw_text)
critique_output = ""
gap_output = ""
for sec, content in sections.items():
snippet = truncate(content)
critique_out = critic(f"Critique this section:\n{snippet}", max_length=256)[0]['generated_text']
gaps_out = critic(f"Identify research gaps in this section:\n{snippet}", max_length=256)[0]['generated_text']
critique_output += f"### {sec.title()}\n{critique_out}\n\n"
gap_output += f"### {sec.title()}\n{gaps_out}\n\n"
custom_out = ""
if custom_prompt.strip():
custom_out = critic(custom_prompt.strip(), max_length=256)[0]['generated_text']
return summary, critique_output.strip(), gap_output.strip(), custom_out
# Gradio interface
demo = gr.Interface(
fn=analyze,
inputs=[
gr.File(label="π Upload PDF", file_types=[".pdf"]),
gr.Textbox(lines=10, label="βοΈ Or paste paper content"),
gr.Textbox(lines=2, label="π¬ Optional: Custom prompt")
],
outputs=[
gr.Markdown(label="π Summary"),
gr.Markdown(label="π§ Critique"),
gr.Markdown(label="π Research Gaps"),
gr.Markdown(label="π‘ Custom Prompt Response")
],
title="Research Paper Critique Generator",
description="Upload a PDF or paste text to receive an AI-generated summary, critique, and identification of research gaps.",
theme="default"
)
if __name__ == "__main__":
demo.launch()
|