tejovanth commited on
Commit
01bcd3d
Β·
verified Β·
1 Parent(s): 112f3a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -53
app.py CHANGED
@@ -1,77 +1,85 @@
1
- import streamlit as st
2
  import pdfplumber
3
- import re
4
  from transformers import pipeline
5
 
6
- # Initialize LLMs
7
- @st.cache_resource
8
- def load_models():
9
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
- critic = pipeline("text2text-generation", model="google/flan-t5-large")
11
- return summarizer, critic
12
 
13
- summarizer, critic = load_models()
14
-
15
- # PDF Text Extraction
16
  def extract_text_from_pdf(pdf_file):
17
  text = ""
18
  with pdfplumber.open(pdf_file) as pdf:
19
  for page in pdf.pages:
20
- page_text = page.extract_text()
21
- if page_text:
22
- text += page_text + "\n"
23
  return text
24
 
25
- # Sectioning Logic
26
  def split_into_sections(text):
27
  sections = {}
28
  headings = ["abstract", "introduction", "methodology", "methods", "results", "discussion", "conclusion", "references"]
29
- current_section = "others"
30
- sections[current_section] = ""
31
-
32
  for line in text.splitlines():
33
- line_lower = line.lower().strip()
34
- if any(h in line_lower for h in headings):
35
- current_section = next((h for h in headings if h in line_lower), "others")
36
- sections[current_section] = ""
37
- sections[current_section] += line + "\n"
38
-
39
  return sections
40
 
41
- # NLP Functions
42
- def summarize_section(text, max_len=300):
43
- return summarizer(text, max_length=max_len, min_length=100, do_sample=False)[0]['summary_text']
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- def critique_section(text):
46
- prompt = f"Critically evaluate this section:\n{text}\nList strengths, weaknesses, and improvements."
47
- return critic(prompt, max_length=512, do_sample=False)[0]['generated_text']
48
 
49
- def identify_research_gaps(text):
50
- prompt = f"Identify research gaps or unanswered questions in the following study:\n{text}"
51
- return critic(prompt, max_length=512, do_sample=False)[0]['generated_text']
52
 
53
- # Streamlit UI
54
- st.set_page_config(page_title="CritiqueGen", layout="wide")
55
- st.title("πŸ“„ Research Paper Critique Generator")
 
56
 
57
- uploaded_file = st.file_uploader("Upload a research paper (PDF)", type="pdf")
 
 
58
 
59
- if uploaded_file:
60
- with st.spinner("Reading and analyzing the paper..."):
61
- full_text = extract_text_from_pdf(uploaded_file)
62
- sections = split_into_sections(full_text)
63
 
64
- st.header("πŸ“‹ Summary")
65
- summary = summarize_section(full_text[:3000])
66
- st.write(summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- st.header("πŸ” Section-wise Critique and Suggestions")
69
- for sec, content in sections.items():
70
- if content.strip():
71
- with st.expander(f"πŸ“˜ {sec.capitalize()}"):
72
- st.subheader("Critique")
73
- st.write(critique_section(content[:1000])) # Limit input size
74
- st.subheader("Research Gaps")
75
- st.write(identify_research_gaps(content[:1000]))
76
 
77
- st.success("Analysis complete βœ…")
 
1
+ import gradio as gr
2
  import pdfplumber
 
3
  from transformers import pipeline
4
 
5
+ # Load models
6
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
+ critic = pipeline("text2text-generation", model="google/flan-t5-large")
 
 
 
8
 
9
+ # PDF text extraction
 
 
10
  def extract_text_from_pdf(pdf_file):
11
  text = ""
12
  with pdfplumber.open(pdf_file) as pdf:
13
  for page in pdf.pages:
14
+ content = page.extract_text()
15
+ if content:
16
+ text += content + "\n"
17
  return text
18
 
19
+ # Split text into sections
20
  def split_into_sections(text):
21
  sections = {}
22
  headings = ["abstract", "introduction", "methodology", "methods", "results", "discussion", "conclusion", "references"]
23
+ current = "others"
24
+ sections[current] = ""
 
25
  for line in text.splitlines():
26
+ if any(h in line.lower() for h in headings):
27
+ current = next((h for h in headings if h in line.lower()), "others")
28
+ sections[current] = ""
29
+ sections[current] += line + "\n"
 
 
30
  return sections
31
 
32
+ # LLM-based functions
33
+ def summarize(text): return summarizer(text[:3000])[0]['summary_text']
34
+ def critique(text): return critic(f"Critique this section:\n{text}", max_length=512)[0]['generated_text']
35
+ def find_gaps(text): return critic(f"What research gaps are present?\n{text}", max_length=512)[0]['generated_text']
36
+ def custom_query(text): return critic(text, max_length=512)[0]['generated_text']
37
+
38
+ # Main handler
39
+ def analyze(file, pasted_text, custom_prompt):
40
+ if file is not None:
41
+ text = extract_text_from_pdf(file.name)
42
+ elif pasted_text.strip():
43
+ text = pasted_text
44
+ else:
45
+ return "Please upload a file or enter some text.", "", "", ""
46
 
47
+ # Generate outputs
48
+ summary = summarize(text)
49
+ sections = split_into_sections(text)
50
 
51
+ critique_output = ""
52
+ gap_output = ""
 
53
 
54
+ for sec, content in sections.items():
55
+ snippet = content[:1000]
56
+ critique_output += f"### {sec.capitalize()}\n**Critique:**\n{critique(snippet)}\n\n"
57
+ gap_output += f"### {sec.capitalize()}\n**Research Gaps:**\n{find_gaps(snippet)}\n\n"
58
 
59
+ custom_response = ""
60
+ if custom_prompt.strip():
61
+ custom_response = custom_query(custom_prompt)
62
 
63
+ return summary, critique_output, gap_output, custom_response
 
 
 
64
 
65
+ # Gradio UI
66
+ demo = gr.Interface(
67
+ fn=analyze,
68
+ inputs=[
69
+ gr.File(label="Upload PDF", file_types=[".pdf"]),
70
+ gr.Textbox(lines=8, placeholder="Or paste your research text here...", label="Pasted Text"),
71
+ gr.Textbox(lines=2, placeholder="Optional: Ask a custom question", label="Custom Prompt")
72
+ ],
73
+ outputs=[
74
+ gr.Markdown(label="πŸ“‹ Summary"),
75
+ gr.Markdown(label="🧠 Critique by Section"),
76
+ gr.Markdown(label="πŸ”Ž Research Gaps"),
77
+ gr.Markdown(label="πŸ’¬ Custom Prompt Response")
78
+ ],
79
+ title="πŸ“„ Research Paper Critique Generator",
80
+ description="Upload a research paper (PDF) or paste its content. Get section-wise critiques, summaries, and gap analysis using LLMs.",
81
+ theme="default"
82
+ )
83
 
84
+ demo.launch()
 
 
 
 
 
 
 
85