File size: 2,803 Bytes
01bcd3d
60c8b88
d3b3dd1
60c8b88
 
d3b3dd1
01bcd3d
 
60c8b88
d3b3dd1
 
 
 
60c8b88
d3b3dd1
 
 
 
60c8b88
d3b3dd1
 
 
 
 
60c8b88
d3b3dd1
 
 
01bcd3d
d3b3dd1
01bcd3d
d3b3dd1
 
 
60c8b88
d3b3dd1
 
60c8b88
01bcd3d
 
60c8b88
01bcd3d
d3b3dd1
 
 
 
 
60c8b88
d3b3dd1
01bcd3d
d3b3dd1
60c8b88
d3b3dd1
60c8b88
d3b3dd1
01bcd3d
 
 
d3b3dd1
 
 
01bcd3d
 
 
d3b3dd1
01bcd3d
d3b3dd1
01bcd3d
d3b3dd1
 
01bcd3d
 
60c8b88
d3b3dd1
 
60c8b88
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
import pdfplumber
import re
from transformers import pipeline

# Load models once
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
critic = pipeline("text2text-generation", model="google/flan-t5-large")

# Extract and clean PDF text
def extract_text(file):
    with pdfplumber.open(file) as pdf:
        return "\n".join([page.extract_text() or "" for page in pdf.pages])

# Split text into sections based on common headings
def extract_sections(text):
    pattern = r'\n(?P<header>[A-Z][A-Za-z ]{3,40})\n'
    matches = list(re.finditer(pattern, text))
    sections = {}
    for i, match in enumerate(matches):
        start = match.end()
        end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
        sections[match.group("header").strip().lower()] = text[start:end].strip()
    return sections or {"full text": text}

# Limit content for faster inference
def truncate(text, max_tokens=1000):
    return text[:max_tokens * 5]  # approx. chars per token

# Run analysis
def analyze(file, pasted_text, custom_prompt):
    raw_text = extract_text(file.name) if file else pasted_text.strip()
    if not raw_text:
        return "No text provided.", "", "", ""

    summary = summarizer(truncate(raw_text), max_length=250, min_length=100, do_sample=False)[0]['summary_text']
    sections = extract_sections(raw_text)

    critique_output = ""
    gap_output = ""

    for sec, content in sections.items():
        snippet = truncate(content)
        critique_out = critic(f"Critique this section:\n{snippet}", max_length=256)[0]['generated_text']
        gaps_out = critic(f"Identify research gaps in this section:\n{snippet}", max_length=256)[0]['generated_text']
        critique_output += f"### {sec.title()}\n{critique_out}\n\n"
        gap_output += f"### {sec.title()}\n{gaps_out}\n\n"

    custom_out = ""
    if custom_prompt.strip():
        custom_out = critic(custom_prompt.strip(), max_length=256)[0]['generated_text']

    return summary, critique_output.strip(), gap_output.strip(), custom_out

# Gradio interface
demo = gr.Interface(
    fn=analyze,
    inputs=[
        gr.File(label="πŸ“„ Upload PDF", file_types=[".pdf"]),
        gr.Textbox(lines=10, label="✍️ Or paste paper content"),
        gr.Textbox(lines=2, label="πŸ’¬ Optional: Custom prompt")
    ],
    outputs=[
        gr.Markdown(label="πŸ“‹ Summary"),
        gr.Markdown(label="🧠 Critique"),
        gr.Markdown(label="πŸ”Ž Research Gaps"),
        gr.Markdown(label="πŸ’‘ Custom Prompt Response")
    ],
    title="Research Paper Critique Generator",
    description="Upload a PDF or paste text to receive an AI-generated summary, critique, and identification of research gaps.",
    theme="default"
)

if __name__ == "__main__":
    demo.launch()