Azidan commited on
Commit
542de0c
·
verified ·
1 Parent(s): ae8ae80

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import torch
4
+ import pdfplumber
5
+ from docx import Document
6
+ import io
7
+
8
+ # Load the summarization model once
9
+ device = 0 if torch.cuda.is_available() else -1
10
+ print(f"Using device: {'GPU' if device == 0 else 'CPU'}")
11
+
12
+ summarizer = pipeline(
13
+ "summarization",
14
+ model="sshleifer/distilbart-cnn-12-6", # Fast & good for CPU; change to "facebook/bart-large-cnn" if you get GPU
15
+ device=device
16
+ )
17
+
18
+ def extract_text(file):
19
+ if file is None:
20
+ return ""
21
+ filename = file.name.lower()
22
+ content = file.read()
23
+ try:
24
+ if filename.endswith('.pdf'):
25
+ with pdfplumber.open(io.BytesIO(content)) as pdf:
26
+ return "\n".join(page.extract_text() or "" for page in pdf.pages)
27
+ elif filename.endswith('.docx'):
28
+ doc = Document(io.BytesIO(content))
29
+ return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
30
+ elif filename.endswith('.txt'):
31
+ return content.decode('utf-8', errors='replace')
32
+ else:
33
+ return "Unsupported file. Please use .pdf, .docx, or .txt"
34
+ except Exception as e:
35
+ return f"Error reading file: {str(e)}"
36
+
37
+ def summarize(input_text, file, detail_level):
38
+ if file is not None:
39
+ text = extract_text(file)
40
+ else:
41
+ text = input_text.strip()
42
+
43
+ if not text:
44
+ return "Please paste text or upload a valid lecture file."
45
+
46
+ words = len(text.split())
47
+ if words < 100:
48
+ return text # Too short → return as-is
49
+
50
+ # Convert slider (0.15 to 0.60) to target length ratio
51
+ target_ratio = detail_level
52
+ target_length = int(words * target_ratio)
53
+ max_l = max(500, min(1400, target_length + 250))
54
+ min_l = max(300, int(target_length * 0.65))
55
+
56
+ try:
57
+ result = summarizer(
58
+ text,
59
+ max_length=max_l,
60
+ min_length=min_l,
61
+ length_penalty=1.8,
62
+ num_beams=4,
63
+ early_stopping=True,
64
+ do_sample=False,
65
+ truncation=True
66
+ )
67
+ return result[0]['summary_text']
68
+ except Exception as e:
69
+ return f"Error during summarization: {str(e)}\n(Try shorter text or lower detail level)"
70
+
71
+ # Create Gradio interface
72
+ interface = gr.Interface(
73
+ fn=summarize,
74
+ inputs=[
75
+ gr.Textbox(
76
+ lines=12,
77
+ placeholder="Paste your lecture text here (or use the upload below)...",
78
+ label="Lecture Text (Paste)"
79
+ ),
80
+ gr.File(
81
+ file_types=[".pdf", ".docx", ".txt"],
82
+ label="Upload Lecture File"
83
+ ),
84
+ gr.Slider(
85
+ minimum=0.15,
86
+ maximum=0.60,
87
+ value=0.32,
88
+ step=0.01,
89
+ label="Detail Level (higher = longer, more detailed summary)"
90
+ )
91
+ ],
92
+ outputs=gr.Textbox(label="Generated Summary"),
93
+ title="Lecture Summarizer",
94
+ description="Upload a lecture file (PDF/DOCX/TXT) or paste text. Adjust the slider for shorter or more detailed summaries.",
95
+ theme="soft",
96
+ allow_flagging="never",
97
+ examples=[
98
+ ["Sample lecture: Photosynthesis is the process by which plants convert light energy into chemical energy...", None, 0.40],
99
+ [None, "example_lecture.pdf", 0.30] # You can upload a sample later if wanted
100
+ ]
101
+ )
102
+
103
+ interface.launch()