Spaces:

akashmishra358
/

pdfcut

Sleeping

App Files Files Community

akashmishra358 commited on Sep 17, 2025

Commit

c027746

verified ·

1 Parent(s): 7e518ac

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +29 -17

src/streamlit_app.py CHANGED Viewed

@@ -21,14 +21,17 @@ def find_sections(pdf_bytes, marker_pattern):
         # Example pattern: r"^(Question|Q)\s*(\d+)"
         matches = re.finditer(marker_pattern, text, re.IGNORECASE | re.MULTILINE)
         for match in matches:
-            question_num = match.group(2) # Assumes the number is the second group
-            # We use a key like 'Q1', 'Q2' for consistency
-            key = f"Q{question_num}"
             if key not in [item['key'] for item in found_items]:
-                 found_items.append({'key': key, 'page': page_num})
     if not found_items:
         return {}
     # Determine page ranges
     for i, item in enumerate(found_items):
@@ -95,7 +98,7 @@ if st.button("🚀 Process PDFs", disabled=(not all([q_file, r_file, s_file]))):
         s_sections = find_sections(s_bytes, marker_pattern)
         # Get a unique, sorted list of all question keys found (e.g., Q1, Q2, Q10)
-        all_keys = sorted(list(set(q_sections.keys()) | set(r_sections.keys()) | set(s_sections.keys())), key=lambda x: int(x[1:]))
         if not all_keys:
             st.error("Could not find any sections with the provided marker. Please check your PDFs or refine the marker text.")
@@ -127,13 +130,16 @@ if st.button("🚀 Process PDFs", disabled=(not all([q_file, r_file, s_file]))):
                     st.markdown("#### Question")
                     if item['question']:
                         with st.expander("👁️ Preview"):
-                            preview_doc = fitz.open(stream=item['question'], filetype="pdf")
-                            pix = preview_doc[0].get_pixmap()
-                            st.image(pix.tobytes())
                         st.download_button(
                             label="⬇️ Download PDF",
                             data=item['question'],
-                            file_name=f"{item['key']}_question.pdf",
                             mime="application/pdf"
                         )
                     else:
@@ -143,13 +149,16 @@ if st.button("🚀 Process PDFs", disabled=(not all([q_file, r_file, s_file]))):
                     st.markdown("#### Rubric")
                     if item['rubric']:
                         with st.expander("👁️ Preview"):
-                            preview_doc = fitz.open(stream=item['rubric'], filetype="pdf")
-                            pix = preview_doc[0].get_pixmap()
-                            st.image(pix.tobytes())
                         st.download_button(
                             label="⬇️ Download PDF",
                             data=item['rubric'],
-                            file_name=f"{item['key']}_rubric.pdf",
                             mime="application/pdf"
                         )
                     else:
@@ -159,13 +168,16 @@ if st.button("🚀 Process PDFs", disabled=(not all([q_file, r_file, s_file]))):
                     st.markdown("#### Solution")
                     if item['solution']:
                         with st.expander("👁️ Preview"):
-                            preview_doc = fitz.open(stream=item['solution'], filetype="pdf")
-                            pix = preview_doc[0].get_pixmap()
-                            st.image(pix.tobytes())
                         st.download_button(
                             label="⬇️ Download PDF",
                             data=item['solution'],
-                            file_name=f"{item['key']}_solution.pdf",
                             mime="application/pdf"
                         )
                     else:

         # Example pattern: r"^(Question|Q)\s*(\d+)"
         matches = re.finditer(marker_pattern, text, re.IGNORECASE | re.MULTILINE)
         for match in matches:
+            # We use the raw number found for sorting later
+            question_num_str = match.group(1)
+            key = f"Q{question_num_str}"
             if key not in [item['key'] for item in found_items]:
+                 found_items.append({'key': key, 'page': page_num, 'num': int(question_num_str)})
     if not found_items:
         return {}
+    # Sort items numerically to handle Q1, Q2, Q10 correctly
+    found_items.sort(key=lambda x: x['num'])
     # Determine page ranges
     for i, item in enumerate(found_items):
         s_sections = find_sections(s_bytes, marker_pattern)
         # Get a unique, sorted list of all question keys found (e.g., Q1, Q2, Q10)
+        all_keys = sorted(list(set(q_sections.keys()) | set(r_sections.keys()) | set(s_sections.keys())), key=lambda x: int(re.search(r'\d+', x).group()))
         if not all_keys:
             st.error("Could not find any sections with the provided marker. Please check your PDFs or refine the marker text.")
                     st.markdown("#### Question")
                     if item['question']:
                         with st.expander("👁️ Preview"):
+                            try:
+                                preview_doc = fitz.open(stream=item['question'], filetype="pdf")
+                                pix = preview_doc[0].get_pixmap()
+                                st.image(pix.tobytes())
+                            except Exception as e:
+                                st.error(f"Could not generate preview: {e}")
                         st.download_button(
                             label="⬇️ Download PDF",
                             data=item['question'],
+                            file_name=f"{item['key'].lower().replace(' ','_')}_question.pdf",
                             mime="application/pdf"
                         )
                     else:
                     st.markdown("#### Rubric")
                     if item['rubric']:
                         with st.expander("👁️ Preview"):
+                            try:
+                                preview_doc = fitz.open(stream=item['rubric'], filetype="pdf")
+                                pix = preview_doc[0].get_pixmap()
+                                st.image(pix.tobytes())
+                            except Exception as e:
+                                st.error(f"Could not generate preview: {e}")
                         st.download_button(
                             label="⬇️ Download PDF",
                             data=item['rubric'],
+                            file_name=f"{item['key'].lower().replace(' ','_')}_rubric.pdf",
                             mime="application/pdf"
                         )
                     else:
                     st.markdown("#### Solution")
                     if item['solution']:
                         with st.expander("👁️ Preview"):
+                            try:
+                                preview_doc = fitz.open(stream=item['solution'], filetype="pdf")
+                                pix = preview_doc[0].get_pixmap()
+                                st.image(pix.tobytes())
+                            except Exception as e:
+                                st.error(f"Could not generate preview: {e}")
                         st.download_button(
                             label="⬇️ Download PDF",
                             data=item['solution'],
+                            file_name=f"{item['key'].lower().replace(' ','_')}_solution.pdf",
                             mime="application/pdf"
                         )
                     else: