Sazzz02 commited on
Commit
0c1c5bf
Β·
verified Β·
1 Parent(s): f006e21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -136
app.py CHANGED
@@ -1,7 +1,5 @@
1
  # Research Paper Summarizer using LangChain and Gradio
2
- # Updated for Hugging Face Spaces with beautiful Gradio UI
3
- # Install required packages first:
4
- # pip install gradio langchain openai pypdf2 reportlab python-dotenv transformers torch
5
 
6
  import gradio as gr
7
  import os
@@ -11,7 +9,6 @@ from io import BytesIO
11
  from reportlab.lib.pagesizes import letter
12
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
14
- from reportlab.lib.units import inch
15
  import tempfile
16
 
17
  # LangChain imports
@@ -20,152 +17,157 @@ from langchain.chains.summarize import load_summarize_chain
20
  from langchain.docstore.document import Document
21
  from langchain.llms import OpenAI
22
  from langchain.chat_models import ChatOpenAI
23
-
24
- # Alternative: Use Hugging Face models
25
  from langchain.llms import HuggingFacePipeline
26
- from transformers import pipeline
 
27
 
28
  load_dotenv()
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  class ResearchPaperSummarizer:
31
  def __init__(self):
32
  self.llm = None
33
  self.model_info = ""
34
-
 
 
 
35
  def setup_llm(self, model_choice):
36
  """Setup LLM based on user choice"""
37
- # Check available API keys from environment
38
  openai_api_key = os.getenv("OPENAI_API_KEY")
39
  hf_token = os.getenv("HUGGINGFACE_TOKEN")
40
-
41
  try:
42
  if "OpenAI" in model_choice:
43
  if not openai_api_key:
44
  return False, "❌ OpenAI API Key not found in environment variables. Please add OPENAI_API_KEY to your Hugging Face Space settings."
45
-
46
  os.environ["OPENAI_API_KEY"] = openai_api_key
47
  if "GPT-4" in model_choice:
48
  self.llm = ChatOpenAI(model_name="gpt-4", temperature=0.3)
49
- self.model_info = "πŸš€ Using GPT-4 (Premium) - Highest quality summaries"
50
  else:
51
  self.llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)
52
- self.model_info = "πŸš€ Using GPT-3.5 Turbo - Fast and efficient"
53
-
54
  else:
55
- # Use Hugging Face models (free but slower)
56
  if "BART" in model_choice:
57
- summarizer = pipeline(
58
- "summarization",
59
- model="facebook/bart-large-cnn",
60
- tokenizer="facebook/bart-large-cnn",
61
- use_auth_token=hf_token if hf_token else None
62
- )
63
- self.model_info = "πŸ€— Using BART model - Free, optimized for summarization"
64
  else:
65
- summarizer = pipeline(
66
- "summarization",
67
- model="t5-base",
68
- tokenizer="t5-base",
69
- use_auth_token=hf_token if hf_token else None
70
- )
71
- self.model_info = "πŸ€— Using T5 model - Free, versatile model"
72
-
73
- self.llm = HuggingFacePipeline(pipeline=summarizer)
74
-
75
  return True, f"βœ… Model loaded successfully! {self.model_info}"
76
-
77
  except Exception as e:
78
  return False, f"❌ Error loading model: {str(e)}"
79
-
80
  def extract_text_from_pdf(self, pdf_file):
81
  """Extract text from uploaded PDF"""
82
  try:
83
  if pdf_file is None:
84
  return None, "❌ No PDF file uploaded"
85
-
86
  pdf_reader = PyPDF2.PdfReader(pdf_file)
87
  text = ""
88
  for page in pdf_reader.pages:
89
- text += page.extract_text()
90
-
 
91
  if not text.strip():
92
  return None, "❌ No text could be extracted from the PDF"
93
-
94
  return text, f"βœ… Successfully extracted {len(text):,} characters from PDF"
95
  except Exception as e:
96
  return None, f"❌ Error reading PDF: {str(e)}"
97
-
98
  def create_documents(self, text):
99
- """Split text into manageable chunks"""
100
  text_splitter = RecursiveCharacterTextSplitter(
101
  chunk_size=4000,
102
  chunk_overlap=200,
103
  length_function=len
104
  )
105
-
106
  chunks = text_splitter.split_text(text)
107
  documents = [Document(page_content=chunk) for chunk in chunks]
108
  return documents
109
-
110
- def generate_summary(self, documents, summary_type="map_reduce"):
111
- """Generate summary using LangChain"""
112
  try:
113
- if summary_type == "map_reduce":
114
- chain = load_summarize_chain(
115
- self.llm,
116
- chain_type="map_reduce",
117
- verbose=False
118
  )
 
 
 
119
  elif summary_type == "stuff":
120
- chain = load_summarize_chain(
121
- self.llm,
122
- chain_type="stuff",
123
- verbose=False
124
- )
125
- else: # refine
126
- chain = load_summarize_chain(
127
- self.llm,
128
- chain_type="refine",
129
- verbose=False
130
- )
131
-
132
  summary = chain.run(documents)
133
  return summary
134
-
135
  except Exception as e:
136
  return f"❌ Error generating summary: {str(e)}"
137
-
138
  def create_structured_summary(self, text, documents):
139
  """Create a structured summary with different sections"""
140
  summaries = {}
141
-
142
  # Overall Summary
143
- summaries['overall'] = self.generate_summary(documents, "map_reduce")
144
-
145
- # Key Points - Create a separate prompt for key points
146
- if len(text) > 8000:
147
- key_points_text = text[:8000]
148
- else:
149
- key_points_text = text
150
-
151
  key_points_prompt = f"""
152
  Extract the 5-7 most important key points from this research paper:
153
-
154
  {key_points_text}
155
  """
156
-
157
  key_points_docs = [Document(page_content=key_points_prompt)]
158
- summaries['key_points'] = self.generate_summary(key_points_docs, "stuff")
159
-
160
  return summaries
161
-
162
  def create_pdf_summary(self, summaries, paper_title="Research Paper Summary"):
163
  """Create PDF with the summary"""
164
  buffer = BytesIO()
165
  doc = SimpleDocTemplate(buffer, pagesize=letter)
166
  styles = getSampleStyleSheet()
167
  story = []
168
-
169
  # Title
170
  title_style = ParagraphStyle(
171
  'CustomTitle',
@@ -174,30 +176,23 @@ class ResearchPaperSummarizer:
174
  spaceAfter=30,
175
  textColor='darkblue'
176
  )
177
-
178
  story.append(Paragraph(paper_title, title_style))
179
  story.append(Spacer(1, 12))
180
-
181
  # Overall Summary
182
  story.append(Paragraph("Overall Summary", styles['Heading2']))
183
  story.append(Spacer(1, 12))
184
  story.append(Paragraph(summaries.get('overall', 'No summary available'), styles['Normal']))
185
  story.append(Spacer(1, 20))
186
-
187
  # Key Points
188
  if 'key_points' in summaries:
189
  story.append(Paragraph("Key Points", styles['Heading2']))
190
  story.append(Spacer(1, 12))
191
  story.append(Paragraph(summaries['key_points'], styles['Normal']))
192
-
193
  doc.build(story)
194
  buffer.seek(0)
195
-
196
- # Save to temporary file for Gradio
197
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf')
198
  temp_file.write(buffer.getvalue())
199
  temp_file.close()
200
-
201
  return temp_file.name
202
 
203
  # Initialize the summarizer
@@ -205,40 +200,31 @@ summarizer = ResearchPaperSummarizer()
205
 
206
  def process_paper(pdf_file, model_choice, summary_type, include_key_points, paper_title):
207
  """Main function to process the research paper"""
208
-
209
  # Setup model
210
  success, message = summarizer.setup_llm(model_choice)
211
  if not success:
212
  return message, "", "", None
213
-
214
  status_message = message + "\n\n"
215
-
216
  # Extract text from PDF
217
  text, extract_message = summarizer.extract_text_from_pdf(pdf_file)
218
  status_message += extract_message + "\n\n"
219
-
220
  if text is None:
221
  return status_message, "", "", None
222
-
223
  # Create documents
224
  documents = summarizer.create_documents(text)
225
  status_message += f"πŸ“ Text split into {len(documents)} chunks for processing\n\n"
226
-
227
  # Generate summary
228
  status_message += "πŸ”„ Generating summary... Please wait...\n\n"
229
-
230
  try:
231
  if include_key_points:
232
  summaries = summarizer.create_structured_summary(text, documents)
233
  overall_summary = summaries.get('overall', 'No summary generated')
234
  key_points = summaries.get('key_points', 'No key points generated')
235
  else:
236
- overall_summary = summarizer.generate_summary(documents, summary_type)
237
  key_points = "Key points not requested"
238
  summaries = {'overall': overall_summary}
239
-
240
  status_message += "πŸŽ‰ Summary generated successfully!"
241
-
242
  # Generate PDF if title is provided
243
  pdf_file_path = None
244
  if paper_title and paper_title.strip():
@@ -247,9 +233,7 @@ def process_paper(pdf_file, model_choice, summary_type, include_key_points, pape
247
  status_message += "\nπŸ“„ PDF summary created!"
248
  except Exception as e:
249
  status_message += f"\n⚠️ PDF creation failed: {str(e)}"
250
-
251
  return status_message, overall_summary, key_points, pdf_file_path
252
-
253
  except Exception as e:
254
  return status_message + f"❌ Error during processing: {str(e)}", "", "", None
255
 
@@ -269,20 +253,17 @@ custom_css = """
269
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
270
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
271
  }
272
-
273
  .gr-interface {
274
  background: rgba(255, 255, 255, 0.95);
275
  backdrop-filter: blur(10px);
276
  border-radius: 20px;
277
  box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
278
  }
279
-
280
  .gr-box {
281
  border-radius: 15px;
282
  border: 2px solid #e1e5e9;
283
  background: linear-gradient(145deg, #ffffff, #f0f2f5);
284
  }
285
-
286
  .gr-button {
287
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
288
  border: none;
@@ -291,17 +272,14 @@ custom_css = """
291
  font-weight: bold;
292
  transition: transform 0.2s;
293
  }
294
-
295
  .gr-button:hover {
296
  transform: translateY(-2px);
297
  box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4);
298
  }
299
-
300
  .gr-textbox, .gr-dropdown {
301
  border-radius: 10px;
302
  border: 2px solid #e1e5e9;
303
  }
304
-
305
  .gr-file {
306
  border-radius: 15px;
307
  border: 3px dashed #667eea;
@@ -311,8 +289,6 @@ custom_css = """
311
 
312
  # Create the Gradio interface
313
  with gr.Blocks(css=custom_css, title="πŸ”¬ Research Paper Summarizer", theme=gr.themes.Soft()) as app:
314
-
315
- # Header
316
  gr.Markdown(
317
  """
318
  # πŸ”¬ Research Paper Summarizer
@@ -322,22 +298,18 @@ with gr.Blocks(css=custom_css, title="πŸ”¬ Research Paper Summarizer", theme=gr.
322
  """,
323
  elem_classes="header"
324
  )
325
-
326
  with gr.Row():
327
  with gr.Column(scale=1):
328
- # Input Section
329
  gr.Markdown("## πŸ“ Upload & Configure")
330
-
331
  pdf_input = gr.File(
332
  label="πŸ“„ Upload Research Paper (PDF)",
333
  file_types=[".pdf"],
334
  elem_classes="file-upload"
335
  )
336
-
337
  model_choice = gr.Dropdown(
338
  choices=[
339
  "OpenAI GPT-3.5",
340
- "OpenAI GPT-4",
341
  "Hugging Face BART",
342
  "Hugging Face T5"
343
  ],
@@ -345,39 +317,31 @@ with gr.Blocks(css=custom_css, title="πŸ”¬ Research Paper Summarizer", theme=gr.
345
  label="πŸ€– Choose AI Model",
346
  info="Free models work without API keys"
347
  )
348
-
349
  model_info = gr.Markdown("")
350
-
351
  summary_type = gr.Dropdown(
352
  choices=["map_reduce", "stuff", "refine"],
353
  value="map_reduce",
354
  label="πŸ“‹ Summary Method",
355
  info="map_reduce: best for long papers | stuff: faster for short papers | refine: iterative improvement"
356
  )
357
-
358
  include_key_points = gr.Checkbox(
359
  label="πŸ”‘ Include Key Points",
360
  value=True,
361
  info="Extract important key points separately"
362
  )
363
-
364
  paper_title = gr.Textbox(
365
  label="πŸ“ Paper Title (for PDF export)",
366
  placeholder="Enter the title of your research paper...",
367
  info="Optional: Used as title in the generated PDF summary"
368
  )
369
-
370
  process_btn = gr.Button(
371
  "πŸš€ Generate Summary",
372
  variant="primary",
373
  size="lg",
374
  elem_classes="process-button"
375
  )
376
-
377
  with gr.Column(scale=2):
378
- # Output Section
379
  gr.Markdown("## πŸ“Š Results")
380
-
381
  status_output = gr.Textbox(
382
  label="πŸ“ˆ Processing Status",
383
  lines=8,
@@ -385,7 +349,6 @@ with gr.Blocks(css=custom_css, title="πŸ”¬ Research Paper Summarizer", theme=gr.
385
  interactive=False,
386
  show_copy_button=True
387
  )
388
-
389
  summary_output = gr.Textbox(
390
  label="πŸ“‹ Overall Summary",
391
  lines=10,
@@ -394,7 +357,6 @@ with gr.Blocks(css=custom_css, title="πŸ”¬ Research Paper Summarizer", theme=gr.
394
  show_copy_button=True,
395
  placeholder="Your paper summary will appear here..."
396
  )
397
-
398
  key_points_output = gr.Textbox(
399
  label="πŸ”‘ Key Points",
400
  lines=8,
@@ -403,38 +365,31 @@ with gr.Blocks(css=custom_css, title="πŸ”¬ Research Paper Summarizer", theme=gr.
403
  show_copy_button=True,
404
  placeholder="Key points will be extracted here..."
405
  )
406
-
407
  pdf_output = gr.File(
408
  label="πŸ“„ Download PDF Summary",
409
  interactive=False
410
  )
411
-
412
- # Setup Instructions
413
  with gr.Accordion("πŸ”§ Setup Instructions for API Keys", open=False):
414
  gr.Markdown(
415
  """
416
  ### For Enhanced Performance (Optional):
417
-
418
  **OpenAI API Setup:**
419
  1. Get your API key from [OpenAI Platform](https://platform.openai.com/api-keys)
420
  2. In your Hugging Face Space settings, add: `OPENAI_API_KEY = your_key_here`
421
  3. Restart your Space to apply changes
422
-
423
  **Hugging Face Token Setup:**
424
  1. Get your token from [HuggingFace Settings](https://huggingface.co/settings/tokens)
425
  2. Add: `HUGGINGFACE_TOKEN = your_token_here`
426
  3. Provides access to gated models and higher rate limits
427
-
428
  **Note:** Free Hugging Face models work without any API keys but may be slower on first load.
429
  """
430
  )
431
-
432
- # Performance Tips
433
  with gr.Accordion("πŸ’‘ Tips for Best Results", open=False):
434
  gr.Markdown(
435
  """
436
  ### Optimization Tips:
437
-
438
  - **πŸ“„ File Size:** Smaller PDFs (< 10MB) process faster
439
  - **πŸ€– Model Choice:** OpenAI models provide highest quality but require API keys
440
  - **⚑ Speed:** "stuff" method is fastest for papers under 20 pages
@@ -443,14 +398,11 @@ with gr.Blocks(css=custom_css, title="πŸ”¬ Research Paper Summarizer", theme=gr.
443
  - **πŸ“± Mobile:** Works on mobile devices but desktop recommended for large files
444
  """
445
  )
446
-
447
- # Event handlers
448
  model_choice.change(
449
  fn=get_model_info,
450
  inputs=[model_choice],
451
  outputs=[model_info]
452
  )
453
-
454
  process_btn.click(
455
  fn=process_paper,
456
  inputs=[
@@ -468,8 +420,6 @@ with gr.Blocks(css=custom_css, title="πŸ”¬ Research Paper Summarizer", theme=gr.
468
  ],
469
  show_progress=True
470
  )
471
-
472
- # Footer
473
  gr.Markdown(
474
  """
475
  ---
@@ -481,7 +431,6 @@ with gr.Blocks(css=custom_css, title="πŸ”¬ Research Paper Summarizer", theme=gr.
481
  elem_classes="footer"
482
  )
483
 
484
- # Launch the app
485
  if __name__ == "__main__":
486
  app.launch(
487
  share=True,
@@ -489,4 +438,5 @@ if __name__ == "__main__":
489
  debug=True,
490
  server_name="0.0.0.0",
491
  server_port=7860
492
- )
 
 
1
  # Research Paper Summarizer using LangChain and Gradio
2
+ # Hugging Face Spaces ready – robust chunking for large PDFs
 
 
3
 
4
  import gradio as gr
5
  import os
 
9
  from reportlab.lib.pagesizes import letter
10
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
11
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 
12
  import tempfile
13
 
14
  # LangChain imports
 
17
  from langchain.docstore.document import Document
18
  from langchain.llms import OpenAI
19
  from langchain.chat_models import ChatOpenAI
 
 
20
  from langchain.llms import HuggingFacePipeline
21
+
22
+ from transformers import pipeline, AutoTokenizer
23
 
24
  load_dotenv()
25
 
26
+ # --- Helper for robust chunking ---
27
+ def chunk_text_for_hf(text, tokenizer, max_tokens=1024, overlap=50):
28
+ """Split text into chunks compatible with Hugging Face summarizers."""
29
+ tokens = tokenizer.encode(text)
30
+ total_tokens = len(tokens)
31
+ if total_tokens <= max_tokens:
32
+ return [text]
33
+ chunks = []
34
+ start = 0
35
+ while start < total_tokens:
36
+ end = min(start + max_tokens, total_tokens)
37
+ chunk_tokens = tokens[start:end]
38
+ chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
39
+ chunks.append(chunk_text)
40
+ start += max_tokens - overlap
41
+ return chunks
42
+
43
+ def summarize_long_text_hf(text, summarizer, tokenizer, max_tokens=1024, overlap=50, max_length=150, min_length=40):
44
+ """Summarize long text by chunking and combining summaries (Hugging Face models)."""
45
+ text_chunks = chunk_text_for_hf(text, tokenizer, max_tokens, overlap)
46
+ summaries = []
47
+ for chunk in text_chunks:
48
+ summary = summarizer(
49
+ chunk,
50
+ max_length=max_length,
51
+ min_length=min_length,
52
+ do_sample=False,
53
+ truncation=True
54
+ )[0]['summary_text']
55
+ summaries.append(summary)
56
+ return " ".join(summaries)
57
+
58
  class ResearchPaperSummarizer:
59
  def __init__(self):
60
  self.llm = None
61
  self.model_info = ""
62
+ self.hf_tokenizer = None
63
+ self.hf_summarizer = None
64
+ self.is_hf_pipeline = False
65
+
66
  def setup_llm(self, model_choice):
67
  """Setup LLM based on user choice"""
 
68
  openai_api_key = os.getenv("OPENAI_API_KEY")
69
  hf_token = os.getenv("HUGGINGFACE_TOKEN")
70
+ self.is_hf_pipeline = False
71
  try:
72
  if "OpenAI" in model_choice:
73
  if not openai_api_key:
74
  return False, "❌ OpenAI API Key not found in environment variables. Please add OPENAI_API_KEY to your Hugging Face Space settings."
 
75
  os.environ["OPENAI_API_KEY"] = openai_api_key
76
  if "GPT-4" in model_choice:
77
  self.llm = ChatOpenAI(model_name="gpt-4", temperature=0.3)
78
+ self.model_info = "πŸš€ Using GPT-4 (Premium)"
79
  else:
80
  self.llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)
81
+ self.model_info = "πŸš€ Using GPT-3.5 Turbo"
 
82
  else:
83
+ self.is_hf_pipeline = True
84
  if "BART" in model_choice:
85
+ model_id = "facebook/bart-large-cnn"
 
 
 
 
 
 
86
  else:
87
+ model_id = "t5-base"
88
+ self.hf_summarizer = pipeline(
89
+ "summarization",
90
+ model=model_id,
91
+ tokenizer=model_id,
92
+ use_auth_token=hf_token if hf_token else None
93
+ )
94
+ self.hf_tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token if hf_token else None)
95
+ self.llm = HuggingFacePipeline(pipeline=self.hf_summarizer)
96
+ self.model_info = f"πŸ€— Using {model_id} model"
97
  return True, f"βœ… Model loaded successfully! {self.model_info}"
 
98
  except Exception as e:
99
  return False, f"❌ Error loading model: {str(e)}"
100
+
101
  def extract_text_from_pdf(self, pdf_file):
102
  """Extract text from uploaded PDF"""
103
  try:
104
  if pdf_file is None:
105
  return None, "❌ No PDF file uploaded"
 
106
  pdf_reader = PyPDF2.PdfReader(pdf_file)
107
  text = ""
108
  for page in pdf_reader.pages:
109
+ page_text = page.extract_text()
110
+ if page_text:
111
+ text += page_text
112
  if not text.strip():
113
  return None, "❌ No text could be extracted from the PDF"
 
114
  return text, f"βœ… Successfully extracted {len(text):,} characters from PDF"
115
  except Exception as e:
116
  return None, f"❌ Error reading PDF: {str(e)}"
117
+
118
  def create_documents(self, text):
119
+ """Split text into manageable chunks for LangChain LLMs"""
120
  text_splitter = RecursiveCharacterTextSplitter(
121
  chunk_size=4000,
122
  chunk_overlap=200,
123
  length_function=len
124
  )
 
125
  chunks = text_splitter.split_text(text)
126
  documents = [Document(page_content=chunk) for chunk in chunks]
127
  return documents
128
+
129
+ def generate_summary(self, documents, summary_type="map_reduce", raw_text=None):
130
+ """Generate summary using LangChain or robust HF chunking"""
131
  try:
132
+ # For Hugging Face models, use robust chunking
133
+ if self.is_hf_pipeline and raw_text and self.hf_summarizer and self.hf_tokenizer:
134
+ return summarize_long_text_hf(
135
+ raw_text, self.hf_summarizer, self.hf_tokenizer,
136
+ max_tokens=1024, overlap=50, max_length=150, min_length=40
137
  )
138
+ # For OpenAI or other models, use LangChain summarization chain
139
+ if summary_type == "map_reduce":
140
+ chain = load_summarize_chain(self.llm, chain_type="map_reduce", verbose=False)
141
  elif summary_type == "stuff":
142
+ chain = load_summarize_chain(self.llm, chain_type="stuff", verbose=False)
143
+ else:
144
+ chain = load_summarize_chain(self.llm, chain_type="refine", verbose=False)
 
 
 
 
 
 
 
 
 
145
  summary = chain.run(documents)
146
  return summary
 
147
  except Exception as e:
148
  return f"❌ Error generating summary: {str(e)}"
149
+
150
  def create_structured_summary(self, text, documents):
151
  """Create a structured summary with different sections"""
152
  summaries = {}
 
153
  # Overall Summary
154
+ summaries['overall'] = self.generate_summary(documents, "map_reduce", raw_text=text)
155
+ # Key Points - Use first 8000 chars for key points
156
+ key_points_text = text[:8000] if len(text) > 8000 else text
 
 
 
 
 
157
  key_points_prompt = f"""
158
  Extract the 5-7 most important key points from this research paper:
 
159
  {key_points_text}
160
  """
 
161
  key_points_docs = [Document(page_content=key_points_prompt)]
162
+ summaries['key_points'] = self.generate_summary(key_points_docs, "stuff", raw_text=key_points_prompt)
 
163
  return summaries
164
+
165
  def create_pdf_summary(self, summaries, paper_title="Research Paper Summary"):
166
  """Create PDF with the summary"""
167
  buffer = BytesIO()
168
  doc = SimpleDocTemplate(buffer, pagesize=letter)
169
  styles = getSampleStyleSheet()
170
  story = []
 
171
  # Title
172
  title_style = ParagraphStyle(
173
  'CustomTitle',
 
176
  spaceAfter=30,
177
  textColor='darkblue'
178
  )
 
179
  story.append(Paragraph(paper_title, title_style))
180
  story.append(Spacer(1, 12))
 
181
  # Overall Summary
182
  story.append(Paragraph("Overall Summary", styles['Heading2']))
183
  story.append(Spacer(1, 12))
184
  story.append(Paragraph(summaries.get('overall', 'No summary available'), styles['Normal']))
185
  story.append(Spacer(1, 20))
 
186
  # Key Points
187
  if 'key_points' in summaries:
188
  story.append(Paragraph("Key Points", styles['Heading2']))
189
  story.append(Spacer(1, 12))
190
  story.append(Paragraph(summaries['key_points'], styles['Normal']))
 
191
  doc.build(story)
192
  buffer.seek(0)
 
 
193
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf')
194
  temp_file.write(buffer.getvalue())
195
  temp_file.close()
 
196
  return temp_file.name
197
 
198
  # Initialize the summarizer
 
200
 
201
  def process_paper(pdf_file, model_choice, summary_type, include_key_points, paper_title):
202
  """Main function to process the research paper"""
 
203
  # Setup model
204
  success, message = summarizer.setup_llm(model_choice)
205
  if not success:
206
  return message, "", "", None
 
207
  status_message = message + "\n\n"
 
208
  # Extract text from PDF
209
  text, extract_message = summarizer.extract_text_from_pdf(pdf_file)
210
  status_message += extract_message + "\n\n"
 
211
  if text is None:
212
  return status_message, "", "", None
 
213
  # Create documents
214
  documents = summarizer.create_documents(text)
215
  status_message += f"πŸ“ Text split into {len(documents)} chunks for processing\n\n"
 
216
  # Generate summary
217
  status_message += "πŸ”„ Generating summary... Please wait...\n\n"
 
218
  try:
219
  if include_key_points:
220
  summaries = summarizer.create_structured_summary(text, documents)
221
  overall_summary = summaries.get('overall', 'No summary generated')
222
  key_points = summaries.get('key_points', 'No key points generated')
223
  else:
224
+ overall_summary = summarizer.generate_summary(documents, summary_type, raw_text=text)
225
  key_points = "Key points not requested"
226
  summaries = {'overall': overall_summary}
 
227
  status_message += "πŸŽ‰ Summary generated successfully!"
 
228
  # Generate PDF if title is provided
229
  pdf_file_path = None
230
  if paper_title and paper_title.strip():
 
233
  status_message += "\nπŸ“„ PDF summary created!"
234
  except Exception as e:
235
  status_message += f"\n⚠️ PDF creation failed: {str(e)}"
 
236
  return status_message, overall_summary, key_points, pdf_file_path
 
237
  except Exception as e:
238
  return status_message + f"❌ Error during processing: {str(e)}", "", "", None
239
 
 
253
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
254
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
255
  }
 
256
  .gr-interface {
257
  background: rgba(255, 255, 255, 0.95);
258
  backdrop-filter: blur(10px);
259
  border-radius: 20px;
260
  box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
261
  }
 
262
  .gr-box {
263
  border-radius: 15px;
264
  border: 2px solid #e1e5e9;
265
  background: linear-gradient(145deg, #ffffff, #f0f2f5);
266
  }
 
267
  .gr-button {
268
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
269
  border: none;
 
272
  font-weight: bold;
273
  transition: transform 0.2s;
274
  }
 
275
  .gr-button:hover {
276
  transform: translateY(-2px);
277
  box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4);
278
  }
 
279
  .gr-textbox, .gr-dropdown {
280
  border-radius: 10px;
281
  border: 2px solid #e1e5e9;
282
  }
 
283
  .gr-file {
284
  border-radius: 15px;
285
  border: 3px dashed #667eea;
 
289
 
290
  # Create the Gradio interface
291
  with gr.Blocks(css=custom_css, title="πŸ”¬ Research Paper Summarizer", theme=gr.themes.Soft()) as app:
 
 
292
  gr.Markdown(
293
  """
294
  # πŸ”¬ Research Paper Summarizer
 
298
  """,
299
  elem_classes="header"
300
  )
 
301
  with gr.Row():
302
  with gr.Column(scale=1):
 
303
  gr.Markdown("## πŸ“ Upload & Configure")
 
304
  pdf_input = gr.File(
305
  label="πŸ“„ Upload Research Paper (PDF)",
306
  file_types=[".pdf"],
307
  elem_classes="file-upload"
308
  )
 
309
  model_choice = gr.Dropdown(
310
  choices=[
311
  "OpenAI GPT-3.5",
312
+ "OpenAI GPT-4",
313
  "Hugging Face BART",
314
  "Hugging Face T5"
315
  ],
 
317
  label="πŸ€– Choose AI Model",
318
  info="Free models work without API keys"
319
  )
 
320
  model_info = gr.Markdown("")
 
321
  summary_type = gr.Dropdown(
322
  choices=["map_reduce", "stuff", "refine"],
323
  value="map_reduce",
324
  label="πŸ“‹ Summary Method",
325
  info="map_reduce: best for long papers | stuff: faster for short papers | refine: iterative improvement"
326
  )
 
327
  include_key_points = gr.Checkbox(
328
  label="πŸ”‘ Include Key Points",
329
  value=True,
330
  info="Extract important key points separately"
331
  )
 
332
  paper_title = gr.Textbox(
333
  label="πŸ“ Paper Title (for PDF export)",
334
  placeholder="Enter the title of your research paper...",
335
  info="Optional: Used as title in the generated PDF summary"
336
  )
 
337
  process_btn = gr.Button(
338
  "πŸš€ Generate Summary",
339
  variant="primary",
340
  size="lg",
341
  elem_classes="process-button"
342
  )
 
343
  with gr.Column(scale=2):
 
344
  gr.Markdown("## πŸ“Š Results")
 
345
  status_output = gr.Textbox(
346
  label="πŸ“ˆ Processing Status",
347
  lines=8,
 
349
  interactive=False,
350
  show_copy_button=True
351
  )
 
352
  summary_output = gr.Textbox(
353
  label="πŸ“‹ Overall Summary",
354
  lines=10,
 
357
  show_copy_button=True,
358
  placeholder="Your paper summary will appear here..."
359
  )
 
360
  key_points_output = gr.Textbox(
361
  label="πŸ”‘ Key Points",
362
  lines=8,
 
365
  show_copy_button=True,
366
  placeholder="Key points will be extracted here..."
367
  )
 
368
  pdf_output = gr.File(
369
  label="πŸ“„ Download PDF Summary",
370
  interactive=False
371
  )
 
 
372
  with gr.Accordion("πŸ”§ Setup Instructions for API Keys", open=False):
373
  gr.Markdown(
374
  """
375
  ### For Enhanced Performance (Optional):
 
376
  **OpenAI API Setup:**
377
  1. Get your API key from [OpenAI Platform](https://platform.openai.com/api-keys)
378
  2. In your Hugging Face Space settings, add: `OPENAI_API_KEY = your_key_here`
379
  3. Restart your Space to apply changes
380
+
381
  **Hugging Face Token Setup:**
382
  1. Get your token from [HuggingFace Settings](https://huggingface.co/settings/tokens)
383
  2. Add: `HUGGINGFACE_TOKEN = your_token_here`
384
  3. Provides access to gated models and higher rate limits
385
+
386
  **Note:** Free Hugging Face models work without any API keys but may be slower on first load.
387
  """
388
  )
 
 
389
  with gr.Accordion("πŸ’‘ Tips for Best Results", open=False):
390
  gr.Markdown(
391
  """
392
  ### Optimization Tips:
 
393
  - **πŸ“„ File Size:** Smaller PDFs (< 10MB) process faster
394
  - **πŸ€– Model Choice:** OpenAI models provide highest quality but require API keys
395
  - **⚑ Speed:** "stuff" method is fastest for papers under 20 pages
 
398
  - **πŸ“± Mobile:** Works on mobile devices but desktop recommended for large files
399
  """
400
  )
 
 
401
  model_choice.change(
402
  fn=get_model_info,
403
  inputs=[model_choice],
404
  outputs=[model_info]
405
  )
 
406
  process_btn.click(
407
  fn=process_paper,
408
  inputs=[
 
420
  ],
421
  show_progress=True
422
  )
 
 
423
  gr.Markdown(
424
  """
425
  ---
 
431
  elem_classes="footer"
432
  )
433
 
 
434
  if __name__ == "__main__":
435
  app.launch(
436
  share=True,
 
438
  debug=True,
439
  server_name="0.0.0.0",
440
  server_port=7860
441
+ )
442
+