Em4e commited on
Commit
acdf80d
·
verified ·
1 Parent(s): 259dab0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -15,10 +15,7 @@ class WebpageContentProcessor:
15
  This class is responsible for the entire content processing pipeline.
16
  """
17
  def __init__(self):
18
- # --- MODIFIED: Removed the converter instantiation from init ---
19
- # The MarkItDown library is instantiated per-conversion.
20
  pass
21
- # -----------------------------------------------------------
22
 
23
  def fetch_and_convert_to_markdown(self, url: str) -> str:
24
  """
@@ -45,11 +42,11 @@ class WebpageContentProcessor:
45
  if not content_container:
46
  return "Error: Could not find the <body> of the webpage."
47
 
48
- # --- MODIFIED: Corrected MarkItDown usage ---
49
- # Instantiate the converter directly with the HTML content.
50
- # The result object's 'text' attribute holds the markdown.
51
- markdown_converter_instance = MarkItDown(str(content_container))
52
- markdown_output = markdown_converter_instance.text
53
  # -----------------------------------------------
54
 
55
  # Post-processing to clean up the resulting Markdown
@@ -326,4 +323,4 @@ with tab2:
326
  st.rerun()
327
 
328
  st.subheader("Final Compiled Document")
329
- st.text_area("Final Markdown Output", manager.get_final_markdown(), height=500, key="final_markdown")
 
15
  This class is responsible for the entire content processing pipeline.
16
  """
17
  def __init__(self):
 
 
18
  pass
 
19
 
20
  def fetch_and_convert_to_markdown(self, url: str) -> str:
21
  """
 
42
  if not content_container:
43
  return "Error: Could not find the <body> of the webpage."
44
 
45
+ # --- MODIFIED: Corrected MarkItDown usage for the installed library version ---
46
+ # 1. Instantiate the converter object.
47
+ markdown_converter_instance = MarkItDown()
48
+ # 2. Call the .convert() method with the HTML content.
49
+ markdown_output = markdown_converter_instance.convert(str(content_container))
50
  # -----------------------------------------------
51
 
52
  # Post-processing to clean up the resulting Markdown
 
323
  st.rerun()
324
 
325
  st.subheader("Final Compiled Document")
326
+ st.text_area("Final Markdown Output", manager.get_final_markdown(), height=500, key="final_markdown")