the-carnage commited on
Commit
2a54e7f
Β·
1 Parent(s): b00aa55

Add text input and improved UI

Browse files
Files changed (1) hide show
  1. app.py +69 -23
app.py CHANGED
@@ -7,8 +7,9 @@ import tempfile
7
  import os
8
 
9
  # Page config
10
- st.set_page_config(page_title="PDF & Image Summarizer", layout="centered")
11
- st.title("πŸ“„πŸ–Ό Document Summarizer")
 
12
 
13
  # Load summarization model
14
  @st.cache_resource
@@ -17,12 +18,6 @@ def load_model():
17
 
18
  summarizer = load_model()
19
 
20
- # File uploader
21
- uploaded_file = st.file_uploader(
22
- "Upload PDF or Image",
23
- type=["pdf", "png", "jpg", "jpeg"]
24
- )
25
-
26
  def extract_text_from_image(image):
27
  return pytesseract.image_to_string(image)
28
 
@@ -35,31 +30,82 @@ def extract_text_from_pdf(file_path):
35
  text += page_text
36
  return text
37
 
38
- if uploaded_file:
39
- # Save temp file
40
- with tempfile.NamedTemporaryFile(delete=False) as tmp:
41
- tmp.write(uploaded_file.read())
42
- file_path = tmp.name
 
 
 
43
 
44
- ext = os.path.splitext(uploaded_file.name)[1].lower()
 
 
 
 
 
 
45
 
46
- if ext == ".pdf":
 
 
 
 
 
 
 
 
 
 
47
  text = extract_text_from_pdf(file_path)
48
- else:
49
- image = Image.open(file_path)
50
- text = extract_text_from_image(image)
51
 
52
- st.subheader("πŸ“ƒ Extracted Text (Preview)")
53
- st.text_area("", text[:3000], height=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- if st.button("Summarize"):
 
 
56
  with st.spinner("Summarizing..."):
 
 
 
57
  summary = summarizer(
58
- text,
59
- max_length=120,
60
  min_length=40,
61
  do_sample=False
62
  )[0]["summary_text"]
63
 
64
  st.subheader("πŸ“ Summary")
65
  st.success(summary)
 
 
 
 
 
 
 
 
 
 
 
7
  import os
8
 
9
  # Page config
10
+ st.set_page_config(page_title="Docurizzer - Document Summarizer", layout="centered")
11
+ st.title("πŸ“„πŸ–Ό Docurizzer")
12
+ st.markdown("*Extract and summarize text from documents, images, or paste your own text*")
13
 
14
  # Load summarization model
15
  @st.cache_resource
 
18
 
19
  summarizer = load_model()
20
 
 
 
 
 
 
 
21
  def extract_text_from_image(image):
22
  return pytesseract.image_to_string(image)
23
 
 
30
  text += page_text
31
  return text
32
 
33
+ # Input method selection
34
+ input_method = st.radio(
35
+ "Choose input method:",
36
+ ["πŸ“ Paste Text", "πŸ“„ Upload PDF", "πŸ–Ό Upload Image"],
37
+ horizontal=True
38
+ )
39
+
40
+ text = ""
41
 
42
+ if input_method == "πŸ“ Paste Text":
43
+ st.subheader("πŸ“ Enter Your Text")
44
+ text = st.text_area(
45
+ "Paste or type your text here:",
46
+ height=250,
47
+ placeholder="Enter the text you want to summarize..."
48
+ )
49
 
50
+ elif input_method == "πŸ“„ Upload PDF":
51
+ uploaded_file = st.file_uploader(
52
+ "Upload a PDF file",
53
+ type=["pdf"]
54
+ )
55
+
56
+ if uploaded_file:
57
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
58
+ tmp.write(uploaded_file.read())
59
+ file_path = tmp.name
60
+
61
  text = extract_text_from_pdf(file_path)
62
+
63
+ st.subheader("πŸ“ƒ Extracted Text (Preview)")
64
+ st.text_area("", text[:3000], height=200, disabled=True)
65
 
66
+ elif input_method == "πŸ–Ό Upload Image":
67
+ uploaded_file = st.file_uploader(
68
+ "Upload an image (PNG, JPG, JPEG)",
69
+ type=["png", "jpg", "jpeg"]
70
+ )
71
+
72
+ if uploaded_file:
73
+ with tempfile.NamedTemporaryFile(delete=False) as tmp:
74
+ tmp.write(uploaded_file.read())
75
+ file_path = tmp.name
76
+
77
+ image = Image.open(file_path)
78
+ st.image(image, caption="Uploaded Image", use_container_width=True)
79
+
80
+ with st.spinner("Extracting text from image..."):
81
+ text = extract_text_from_image(image)
82
+
83
+ st.subheader("πŸ“ƒ Extracted Text (Preview)")
84
+ st.text_area("", text[:3000], height=200, disabled=True)
85
 
86
+ # Summarize button
87
+ if text.strip():
88
+ if st.button("πŸš€ Summarize", type="primary"):
89
  with st.spinner("Summarizing..."):
90
+ # Handle text length for T5 model
91
+ input_text = text[:4000] if len(text) > 4000 else text
92
+
93
  summary = summarizer(
94
+ input_text,
95
+ max_length=150,
96
  min_length=40,
97
  do_sample=False
98
  )[0]["summary_text"]
99
 
100
  st.subheader("πŸ“ Summary")
101
  st.success(summary)
102
+
103
+ # Copy button
104
+ st.download_button(
105
+ label="πŸ“‹ Download Summary",
106
+ data=summary,
107
+ file_name="summary.txt",
108
+ mime="text/plain"
109
+ )
110
+ else:
111
+ st.info("πŸ‘† Please provide some text using one of the methods above.")