Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,10 +15,7 @@ class WebpageContentProcessor:
|
|
| 15 |
This class is responsible for the entire content processing pipeline.
|
| 16 |
"""
|
| 17 |
def __init__(self):
|
| 18 |
-
# --- MODIFIED: Removed the converter instantiation from init ---
|
| 19 |
-
# The MarkItDown library is instantiated per-conversion.
|
| 20 |
pass
|
| 21 |
-
# -----------------------------------------------------------
|
| 22 |
|
| 23 |
def fetch_and_convert_to_markdown(self, url: str) -> str:
|
| 24 |
"""
|
|
@@ -45,11 +42,11 @@ class WebpageContentProcessor:
|
|
| 45 |
if not content_container:
|
| 46 |
return "Error: Could not find the <body> of the webpage."
|
| 47 |
|
| 48 |
-
# --- MODIFIED: Corrected MarkItDown usage ---
|
| 49 |
-
# Instantiate the converter
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
markdown_output = markdown_converter_instance.
|
| 53 |
# -----------------------------------------------
|
| 54 |
|
| 55 |
# Post-processing to clean up the resulting Markdown
|
|
@@ -326,4 +323,4 @@ with tab2:
|
|
| 326 |
st.rerun()
|
| 327 |
|
| 328 |
st.subheader("Final Compiled Document")
|
| 329 |
-
st.text_area("Final Markdown Output", manager.get_final_markdown(), height=500, key="final_markdown")
|
|
|
|
| 15 |
This class is responsible for the entire content processing pipeline.
|
| 16 |
"""
|
| 17 |
def __init__(self):
|
|
|
|
|
|
|
| 18 |
pass
|
|
|
|
| 19 |
|
| 20 |
def fetch_and_convert_to_markdown(self, url: str) -> str:
|
| 21 |
"""
|
|
|
|
| 42 |
if not content_container:
|
| 43 |
return "Error: Could not find the <body> of the webpage."
|
| 44 |
|
| 45 |
+
# --- MODIFIED: Corrected MarkItDown usage for the installed library version ---
|
| 46 |
+
# 1. Instantiate the converter object.
|
| 47 |
+
markdown_converter_instance = MarkItDown()
|
| 48 |
+
# 2. Call the .convert() method with the HTML content.
|
| 49 |
+
markdown_output = markdown_converter_instance.convert(str(content_container))
|
| 50 |
# -----------------------------------------------
|
| 51 |
|
| 52 |
# Post-processing to clean up the resulting Markdown
|
|
|
|
| 323 |
st.rerun()
|
| 324 |
|
| 325 |
st.subheader("Final Compiled Document")
|
| 326 |
+
st.text_area("Final Markdown Output", manager.get_final_markdown(), height=500, key="final_markdown")
|