Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,10 +38,7 @@ class WebpageContentProcessor:
|
|
| 38 |
content_container = soup.find('body')
|
| 39 |
if not content_container:
|
| 40 |
return "Error: Could not find the <body> of the webpage."
|
| 41 |
-
# --- MODIFIED: Switched to markdownify for conversion ---
|
| 42 |
-
# markdownify is a simple function call.
|
| 43 |
markdown_output = md(str(content_container))
|
| 44 |
-
# -----------------------------------------------
|
| 45 |
# Post-processing to clean up the resulting Markdown
|
| 46 |
markdown_output = re.sub(r'\n{3,}', '\n\n', markdown_output)
|
| 47 |
markdown_output = re.sub(r'(\n\s*[\*\-]\s*\n)|(^\s*[\*\-]\s*$)', '\n', markdown_output, flags=re.MULTILINE)
|
|
@@ -51,7 +48,6 @@ class WebpageContentProcessor:
|
|
| 51 |
except requests.exceptions.RequestException as e:
|
| 52 |
return f"Error fetching the URL: {e}. Please check the URL and your connection."
|
| 53 |
except Exception as e:
|
| 54 |
-
# Added more specific error logging for debugging
|
| 55 |
return f"An unexpected error occurred during content processing: {e}"
|
| 56 |
|
| 57 |
def parse_markdown_into_chunks(self, markdown_content: str) -> list:
|
|
@@ -196,9 +192,7 @@ def init_session_state():
|
|
| 196 |
st.session_state.selected_chunk_id = None
|
| 197 |
if 'status_message' not in st.session_state:
|
| 198 |
st.session_state.status_message = ""
|
| 199 |
-
#
|
| 200 |
-
if 'show_preview' not in st.session_state:
|
| 201 |
-
st.session_state.show_preview = False
|
| 202 |
|
| 203 |
init_session_state()
|
| 204 |
|
|
@@ -249,7 +243,6 @@ if st.button("Process URL", use_container_width=True, type="primary"):
|
|
| 249 |
else:
|
| 250 |
st.session_state.status_message = "Could not extract any content chunks."
|
| 251 |
st.session_state.selected_chunk_id = None
|
| 252 |
-
st.session_state.show_preview = False # Ensure preview is off when processing new URL
|
| 253 |
st.rerun()
|
| 254 |
|
| 255 |
if st.session_state.status_message:
|
|
@@ -279,54 +272,45 @@ with tab1:
|
|
| 279 |
|
| 280 |
if selected_id != st.session_state.selected_chunk_id:
|
| 281 |
st.session_state.selected_chunk_id = selected_id
|
| 282 |
-
st.session_state.show_preview = False # NEW: Reset preview when changing chunk
|
| 283 |
st.rerun()
|
| 284 |
|
| 285 |
selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
|
| 286 |
|
| 287 |
if selected_chunk:
|
| 288 |
-
|
| 289 |
-
st.
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
"
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
manager.delete_chunk(selected_chunk['id'])
|
| 309 |
-
st.session_state.status_message = "Chunk deleted."
|
| 310 |
-
remaining_chunks = manager.get_chunks()
|
| 311 |
-
st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
|
| 312 |
-
st.session_state.show_preview = False # Hide preview after delete
|
| 313 |
-
st.rerun()
|
| 314 |
-
|
| 315 |
-
# NEW: Preview button in the third column
|
| 316 |
-
with col3:
|
| 317 |
-
if st.button("Preview Content", use_container_width=True, key=f"preview_{selected_chunk['id']}"):
|
| 318 |
-
# Toggle the preview state
|
| 319 |
-
st.session_state.show_preview = not st.session_state.show_preview
|
| 320 |
st.rerun()
|
| 321 |
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
st.
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
|
| 332 |
with tab2:
|
|
@@ -348,4 +332,4 @@ with tab2:
|
|
| 348 |
st.rerun()
|
| 349 |
|
| 350 |
st.subheader("Final Compiled Document")
|
| 351 |
-
st.text_area("Final Markdown Output", manager.get_final_markdown(), height=500, key="final_markdown")
|
|
|
|
| 38 |
content_container = soup.find('body')
|
| 39 |
if not content_container:
|
| 40 |
return "Error: Could not find the <body> of the webpage."
|
|
|
|
|
|
|
| 41 |
markdown_output = md(str(content_container))
|
|
|
|
| 42 |
# Post-processing to clean up the resulting Markdown
|
| 43 |
markdown_output = re.sub(r'\n{3,}', '\n\n', markdown_output)
|
| 44 |
markdown_output = re.sub(r'(\n\s*[\*\-]\s*\n)|(^\s*[\*\-]\s*$)', '\n', markdown_output, flags=re.MULTILINE)
|
|
|
|
| 48 |
except requests.exceptions.RequestException as e:
|
| 49 |
return f"Error fetching the URL: {e}. Please check the URL and your connection."
|
| 50 |
except Exception as e:
|
|
|
|
| 51 |
return f"An unexpected error occurred during content processing: {e}"
|
| 52 |
|
| 53 |
def parse_markdown_into_chunks(self, markdown_content: str) -> list:
|
|
|
|
| 192 |
st.session_state.selected_chunk_id = None
|
| 193 |
if 'status_message' not in st.session_state:
|
| 194 |
st.session_state.status_message = ""
|
| 195 |
+
# REMOVED: The show_preview state is no longer needed with the side-by-side layout
|
|
|
|
|
|
|
| 196 |
|
| 197 |
init_session_state()
|
| 198 |
|
|
|
|
| 243 |
else:
|
| 244 |
st.session_state.status_message = "Could not extract any content chunks."
|
| 245 |
st.session_state.selected_chunk_id = None
|
|
|
|
| 246 |
st.rerun()
|
| 247 |
|
| 248 |
if st.session_state.status_message:
|
|
|
|
| 272 |
|
| 273 |
if selected_id != st.session_state.selected_chunk_id:
|
| 274 |
st.session_state.selected_chunk_id = selected_id
|
|
|
|
| 275 |
st.rerun()
|
| 276 |
|
| 277 |
selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
|
| 278 |
|
| 279 |
if selected_chunk:
|
| 280 |
+
# --- NEW: Side-by-side layout for editor and live preview ---
|
| 281 |
+
editor_col, preview_col = st.columns(2)
|
| 282 |
+
|
| 283 |
+
with editor_col:
|
| 284 |
+
st.markdown(f"**Editing: {selected_chunk['title']}**")
|
| 285 |
+
st.markdown(manager.format_chunk_stats(selected_chunk['stats']), unsafe_allow_html=True)
|
| 286 |
+
|
| 287 |
+
edited_content = st.text_area(
|
| 288 |
+
"Chunk Content (Markdown)",
|
| 289 |
+
value=selected_chunk['content'],
|
| 290 |
+
height=400, # Increased height for better visibility
|
| 291 |
+
key=f"editor_{selected_chunk['id']}"
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
# Action buttons below the editor
|
| 295 |
+
b_col1, b_col2, _ = st.columns([1, 1, 3])
|
| 296 |
+
|
| 297 |
+
if b_col1.button("Update Chunk", use_container_width=True, key=f"update_{selected_chunk['id']}"):
|
| 298 |
+
manager.update_chunk_content(selected_chunk['id'], edited_content)
|
| 299 |
+
st.session_state.status_message = "Chunk updated successfully!"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
st.rerun()
|
| 301 |
|
| 302 |
+
if b_col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
|
| 303 |
+
manager.delete_chunk(selected_chunk['id'])
|
| 304 |
+
st.session_state.status_message = "Chunk deleted."
|
| 305 |
+
remaining_chunks = manager.get_chunks()
|
| 306 |
+
st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
|
| 307 |
+
st.rerun()
|
| 308 |
+
|
| 309 |
+
with preview_col:
|
| 310 |
+
st.markdown("**Live Preview**")
|
| 311 |
+
# The container now continuously renders the content from the editor
|
| 312 |
+
with st.container(height=525, border=True):
|
| 313 |
+
st.markdown(edited_content, unsafe_allow_html=True)
|
| 314 |
|
| 315 |
|
| 316 |
with tab2:
|
|
|
|
| 332 |
st.rerun()
|
| 333 |
|
| 334 |
st.subheader("Final Compiled Document")
|
| 335 |
+
st.text_area("Final Markdown Output", manager.get_final_markdown(), height=500, key="final_markdown")
|