Spaces:

Em4e
/

chunk-based-text-editor

Sleeping

App Files Files Community

Em4e commited on Jun 9, 2025

Commit

5543eef

verified ·

1 Parent(s): c063934

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -127

app.py CHANGED Viewed

@@ -5,21 +5,22 @@ from html_to_markdown import convert_to_markdown
 import re
 from llama_index.core.node_parser import MarkdownNodeParser
 from llama_index.core.schema import Document, MetadataMode
-import textstat # For readability metrics
 class WebpageContentProcessor:
     """
     Handles fetching, converting, and parsing webpage content into structured chunks.
-    Adheres to the Single Responsibility Principle (SRP) for content processing.
     """
     def __init__(self):
         pass
     def fetch_and_convert_to_markdown(self, url: str) -> str:
         """
-        Fetches HTML content from a given URL, attempts to isolate the main content,
-        removes common boilerplate, and converts to Markdown.
-        Prioritizes semantic content tags over H1-based identification for robust extraction.
         """
         try:
             headers = {
@@ -30,107 +31,87 @@ class WebpageContentProcessor:
             html_content = response.text
             soup = BeautifulSoup(html_content, 'html.parser')
-            for tag_name in ['script', 'style', 'noscript', 'meta', 'link']:
                 for element in soup.find_all(tag_name):
                     element.decompose()
             content_for_conversion = soup.find('article') or soup.find('main') or \
-                                     soup.find('div', class_='main-content') or \
                                      soup.find('div', {'role': 'main'})
             if not content_for_conversion:
-                first_h1 = soup.find('h1')
-                if first_h1:
-                    candidate_container = first_h1.parent
-                    for _ in range(5):
-                        if candidate_container is None: break
-                        if candidate_container.name in ['article', 'main', 'section', 'div']:
-                            content_for_conversion = candidate_container
-                            break
-                        candidate_container = candidate_container.parent
-                    if not content_for_conversion:
-                         content_for_conversion = first_h1.find_parent()
-                else:
-                    content_for_conversion = soup.body
-            if not content_for_conversion:
-                return "Error: Could not identify main content for conversion."
-            unwanted_selectors = [
-                'nav', 'header', 'footer', 'aside', 'iframe', 'form', 'button', 'input',
-                'textarea', 'svg', 'figure', 'figcaption',
-                '.social-share', '.comments', '.related-posts', '.pagination',
-                '.breadcrumbs', '.cookie-consent', '[role="navigation"]',
-                '[role="banner"]', '[role="contentinfo"]', '[class*="ad"]', '[id*="ad"]'
-            ]
-            for selector in unwanted_selectors:
-                for element in content_for_conversion.select(selector):
-                    element.decompose()
             markdown_output = convert_to_markdown(str(content_for_conversion))
-            markdown_output = re.sub(r'\n{3,}', '\n\n', markdown_output)
-            markdown_output = markdown_output.strip()
             return markdown_output
         except requests.exceptions.Timeout:
-            return "Error: Request timed out. The server took too long to respond."
         except requests.exceptions.RequestException as e:
-            return f"Error fetching URL: {e}."
         except Exception as e:
-            return f"An unexpected error occurred: {e}"
     def parse_markdown_into_chunks(self, markdown_content: str) -> list:
         """
-        Parses Markdown content into LlamaIndex nodes (chunks) and extracts title and content.
-        This version uses MarkdownNodeParser to leverage the document's structure.
         """
         if not markdown_content or "Error" in markdown_content:
             return []
-        doc = Document(text=markdown_content, metadata={"filename": "webpage_content"})
         parser = MarkdownNodeParser(include_metadata=True)
         nodes = parser.get_nodes_from_documents([doc])
         structured_chunks = []
         for i, node in enumerate(nodes):
-            pure_text_content = node.get_content(metadata_mode=MetadataMode.NONE).strip()
-            if not pure_text_content:
                 continue
-            heading_title = ""
-            content_text = pure_text_content
-            # Attempt to find a title from a markdown header
-            heading_match = re.match(r"^(#+)\s*(.*)", pure_text_content)
-            if heading_match:
-                heading_title = heading_match.group(2).strip()
-                # Remove the title from the content itself
-                content_text = pure_text_content[len(heading_match.group(0)):].strip()
-                if not heading_title:
-                    heading_title = "[Untitled Section]"
             else:
-                # Fallback to using the first line as the title
-                first_line = content_text.split('\n')[0].strip()
-                heading_title = (first_line[:75] + "...") if len(first_line) > 75 else first_line
-                if not heading_title:
-                    heading_title = "[Empty Section]"
             structured_chunks.append({
                 "id": i,
-                "title": heading_title,
                 "content": content_text
             })
         return structured_chunks
 class ChunkManager:
     def __init__(self):
         self._chunks = []
         self.target_flesch_min = 60
-        self.target_grade_max = 8
-        self.target_min_chunk_words = 50
-        self.target_max_chunk_words = 500
     def set_chunks(self, chunks: list):
         self._chunks = [self._add_stats_to_chunk(chunk) for chunk in chunks]
@@ -143,39 +124,45 @@ class ChunkManager:
         return chunk
     def _calculate_chunk_stats(self, text: str) -> dict:
         stats = {}
         try:
             stats['word_count'] = textstat.lexicon_count(text, removepunct=True)
             stats['flesch_reading_ease'] = textstat.flesch_reading_ease(text)
             stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(text)
-        except Exception:
             stats.update({'word_count': 0, 'flesch_reading_ease': 0, 'flesch_kincaid_grade': 0})
         return stats
     def format_chunk_stats(self, stats: dict) -> str:
         flesch_color = "green" if stats.get('flesch_reading_ease', 0) >= self.target_flesch_min else "red"
         grade_color = "green" if stats.get('flesch_kincaid_grade', 0) <= self.target_grade_max else "red"
         word_color = "green" if self.target_min_chunk_words <= stats.get('word_count', 0) <= self.target_max_chunk_words else "red"
         return (
-            f"**Word Count:** <span style='color:{word_color}'>{stats.get('word_count', 0)}</span> | "
-            f"**Reading Ease:** <span style='color:{flesch_color}'>{stats.get('flesch_reading_ease', 0):.2f}</span> | "
-            f"**Grade Level:** <span style='color:{grade_color}'>{stats.get('flesch_kincaid_grade', 0):.2f}</span>"
         )
     def get_document_summary_stats(self) -> str:
         if not self._chunks:
             return "No document loaded."
         total_words = sum(c['stats']['word_count'] for c in self._chunks)
-        avg_ease = sum(c['stats']['flesch_reading_ease'] for c in self._chunks) / len(self._chunks) if self._chunks else 0
-        avg_grade = sum(c['stats']['flesch_kincaid_grade'] for c in self._chunks) / len(self._chunks) if self._chunks else 0
         return (
-            f"**Total Chunks:** {len(self._chunks)} | "
-            f"**Total Words:** {total_words} | "
-            f"**Avg. Reading Ease:** {avg_ease:.2f} | "
-            f"**Avg. Grade Level:** {avg_grade:.2f}"
         )
     def get_chunk_by_id(self, chunk_id: int) -> dict | None:
@@ -185,13 +172,7 @@ class ChunkManager:
         chunk = self.get_chunk_by_id(chunk_id)
         if chunk:
             chunk["content"] = new_content
-            self._add_stats_to_chunk(chunk)
-            # Optionally update title if it's derived from content
-            if chunk["title"].startswith("[") or not re.match(r"^(#+)\s*(.*)", chunk["content"]):
-                 first_line = new_content.split('\n')[0].strip()
-                 chunk["title"] = (first_line[:75] + '...') if len(first_line) > 75 else first_line
-                 if not chunk["title"]: chunk["title"] = "[Empty Section]"
     def delete_chunk(self, chunk_id: int):
         self._chunks = [c for c in self._chunks if c["id"] != chunk_id]
@@ -202,53 +183,61 @@ class ChunkManager:
     def get_final_markdown(self) -> str:
         if not self._chunks:
             return "No content to display."
-        return "\n\n".join(f"# {c['title']}\n{c['content']}" for c in self._chunks)
     def set_targets(self, flesch_min: float, grade_max: float, min_words: int, max_words: int):
         self.target_flesch_min = flesch_min
         self.target_grade_max = grade_max
         self.target_min_chunk_words = min_words
         self.target_max_chunk_words = max_words
-        # Recalculate stats for all chunks with new targets
-        self.set_chunks(self.get_chunks())
-# --- Streamlit UI ---
 st.set_page_config(layout="wide", page_title="Webpage Content Editor")
-# Initialize session state variables if they don't exist
 def init_session_state():
-    if 'chunk_manager' not in st.session_state:
-        st.session_state.chunk_manager = ChunkManager()
-    if 'content_processor' not in st.session_state:
-        st.session_state.content_processor = WebpageContentProcessor()
     if 'selected_chunk_id' not in st.session_state:
         st.session_state.selected_chunk_id = None
     if 'status_message' not in st.session_state:
         st.session_state.status_message = ""
-    if 'url_input' not in st.session_state:
-        st.session_state.url_input = ""
 init_session_state()
-processor = st.session_state.content_processor
-manager = st.session_state.chunk_manager
 st.title("✨ Webpage Content Editor")
-st.caption("Created by [Emilija Gjorgjevska](https://www.linkedin.com/in/emilijagjorgjevska/) | Inspired by Andrea Volpini's work on content chunking.")
 st.info(
-    "**Note:** Some URLs may be blocked due to server policies (like bot detection). "
-    "This is an early version, so expect a few bugs!",
     icon="ℹ️"
 )
-url_input = st.text_input("Enter a webpage URL to begin", value=st.session_state.url_input, key="url_input_widget")
-if st.button("Process URL", use_container_width=True):
-    st.session_state.url_input = st.session_state.url_input_widget
-    if st.session_state.url_input:
-        with st.spinner("Fetching and processing content..."):
-            markdown = processor.fetch_and_convert_to_markdown(st.session_state.url_input)
             if "Error" in markdown:
                 st.session_state.status_message = markdown
                 manager.set_chunks([])
@@ -260,38 +249,40 @@ if st.button("Process URL", use_container_width=True):
                     st.session_state.status_message = f"Successfully processed {len(chunks)} chunks."
                     st.session_state.selected_chunk_id = chunks[0]['id']
                 else:
-                    st.session_state.status_message = "Could not extract content chunks."
                     st.session_state.selected_chunk_id = None
-            st.rerun()
 if st.session_state.status_message:
     st.toast(st.session_state.status_message)
-    st.session_state.status_message = ""
 tab1, tab2 = st.tabs(["Chunk Editor", "Settings & Overview"])
 with tab1:
     chunks = manager.get_chunks()
     if not chunks:
-        st.write("Process a URL to start editing chunks.")
     else:
         chunk_ids = [c['id'] for c in chunks]
-        # Ensure selected_chunk_id is valid
         if st.session_state.selected_chunk_id not in chunk_ids:
             st.session_state.selected_chunk_id = chunk_ids[0] if chunk_ids else None
         if st.session_state.selected_chunk_id is not None:
-            chunk_options = {c['id']: f"Chunk {c['id']}: {c['title']}" for c in chunks}
-            # The selectbox's state is now managed directly by st.session_state.selected_chunk_id
             selected_id = st.selectbox(
                 "Select a chunk to edit",
                 options=chunk_ids,
-                format_func=lambda x: chunk_options.get(x, "Invalid Chunk"),
                 index=chunk_ids.index(st.session_state.selected_chunk_id)
             )
-            # Update the session state ONLY if the user selection has changed
             if selected_id != st.session_state.selected_chunk_id:
                 st.session_state.selected_chunk_id = selected_id
                 st.rerun()
@@ -299,24 +290,28 @@ with tab1:
             selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
             if selected_chunk:
                 st.markdown(manager.format_chunk_stats(selected_chunk['stats']), unsafe_allow_html=True)
                 edited_content = st.text_area(
                     "Chunk Content",
                     value=selected_chunk['content'],
-                    height=300,
-                    key=f"editor_{selected_chunk['id']}" # Unique key forces widget to re-render
                 )
-                col1, col2, _ = st.columns([1, 1, 4])
                 if col1.button("Update Chunk", use_container_width=True, key=f"update_{selected_chunk['id']}"):
                     manager.update_chunk_content(selected_chunk['id'], edited_content)
-                    st.session_state.status_message = "Chunk updated!"
                     st.rerun()
                 if col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
                     manager.delete_chunk(selected_chunk['id'])
-                    st.session_state.status_message = "Chunk deleted!"
                     remaining_chunks = manager.get_chunks()
                     st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
                     st.rerun()
@@ -327,6 +322,7 @@ with tab2:
     st.subheader("Content Targets")
     with st.form("targets_form"):
         c1, c2 = st.columns(2)
         f_min = c1.number_input("Min Flesch Reading Ease", value=float(manager.target_flesch_min))
         g_max = c2.number_input("Max Flesch-Kincaid Grade", value=float(manager.target_grade_max))
@@ -335,8 +331,8 @@ with tab2:
         if st.form_submit_button("Set New Targets", use_container_width=True):
             manager.set_targets(f_min, g_max, w_min, w_max)
-            st.session_state.status_message = "Targets updated."
             st.rerun()
-    st.subheader("Final Document")
-    st.text_area("Compiled Markdown", manager.get_final_markdown(), height=400, disabled=False, key="final_markdown")

 import re
 from llama_index.core.node_parser import MarkdownNodeParser
 from llama_index.core.schema import Document, MetadataMode
+import textstat
+# --- Core Logic Classes ---
 class WebpageContentProcessor:
     """
     Handles fetching, converting, and parsing webpage content into structured chunks.
+    This class is responsible for the entire content processing pipeline.
     """
     def __init__(self):
         pass
     def fetch_and_convert_to_markdown(self, url: str) -> str:
         """
+        Fetches HTML content from a URL, cleans it, and converts it to Markdown.
+        It intelligently tries to find the main content block of the page.
         """
         try:
             headers = {
             html_content = response.text
             soup = BeautifulSoup(html_content, 'html.parser')
+            # Remove non-content tags like scripts and styles
+            for tag_name in ['script', 'style', 'noscript', 'meta', 'link', 'header', 'footer', 'nav', 'aside']:
                 for element in soup.find_all(tag_name):
                     element.decompose()
+            # Find the main content area of the webpage
             content_for_conversion = soup.find('article') or soup.find('main') or \
+                                     soup.find('div', class_=re.compile(r'content|post|body')) or \
                                      soup.find('div', {'role': 'main'})
+            # Fallback to the entire body if no main content is found
             if not content_for_conversion:
+                content_for_conversion = soup.body
+                if not content_for_conversion:
+                    return "Error: Could not find any content on the page."
+            # Convert the cleaned HTML to Markdown
             markdown_output = convert_to_markdown(str(content_for_conversion))
+            # Clean up excessive newlines
+            markdown_output = re.sub(r'\n{3,}', '\n\n', markdown_output).strip()
             return markdown_output
         except requests.exceptions.Timeout:
+            return "Error: The request timed out. The server is taking too long to respond."
         except requests.exceptions.RequestException as e:
+            return f"Error fetching the URL: {e}. Please check the URL and your connection."
         except Exception as e:
+            return f"An unexpected error occurred during content processing: {e}"
     def parse_markdown_into_chunks(self, markdown_content: str) -> list:
         """
+        Parses Markdown content into logically separated chunks based on its structure.
+        Uses MarkdownNodeParser to respect headers and sections.
         """
         if not markdown_content or "Error" in markdown_content:
             return []
+        # This parser understands Markdown structure (headings, lists) and splits accordingly.
         parser = MarkdownNodeParser(include_metadata=True)
+        doc = Document(text=markdown_content)
         nodes = parser.get_nodes_from_documents([doc])
         structured_chunks = []
         for i, node in enumerate(nodes):
+            content = node.get_content(metadata_mode=MetadataMode.NONE).strip()
+            if not content:
                 continue
+            # Extract title from the markdown header if it exists
+            title_match = re.match(r"^(#+)\s*(.*)", content)
+            if title_match:
+                title = title_match.group(2).strip()
+                # The content should not include the title line itself
+                content_text = content[len(title_match.group(0)):].strip()
             else:
+                # If no header, use the first line as a fallback title
+                first_line = content.split('\n')[0].strip()
+                title = (first_line[:75] + '...') if len(first_line) > 75 else first_line
+                content_text = content
+            # Ensure there's a title even for empty sections
+            if not title:
+                title = f"[Chunk {i+1}]"
             structured_chunks.append({
                 "id": i,
+                "title": title,
                 "content": content_text
             })
         return structured_chunks
 class ChunkManager:
+    """
+    Manages the state of chunks, including their content, statistics, and targets.
+    """
     def __init__(self):
         self._chunks = []
         self.target_flesch_min = 60
+        self.target_grade_max = 9
+        self.target_min_chunk_words = 40
+        self.target_max_chunk_words = 600
     def set_chunks(self, chunks: list):
         self._chunks = [self._add_stats_to_chunk(chunk) for chunk in chunks]
         return chunk
     def _calculate_chunk_stats(self, text: str) -> dict:
+        """Calculates readability and other metrics for a text chunk."""
         stats = {}
         try:
             stats['word_count'] = textstat.lexicon_count(text, removepunct=True)
             stats['flesch_reading_ease'] = textstat.flesch_reading_ease(text)
             stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(text)
+        except (Exception, TypeError): # Catch potential errors from textstat
             stats.update({'word_count': 0, 'flesch_reading_ease': 0, 'flesch_kincaid_grade': 0})
         return stats
     def format_chunk_stats(self, stats: dict) -> str:
+        """Creates a formatted string of stats with color-coding based on targets."""
         flesch_color = "green" if stats.get('flesch_reading_ease', 0) >= self.target_flesch_min else "red"
         grade_color = "green" if stats.get('flesch_kincaid_grade', 0) <= self.target_grade_max else "red"
         word_color = "green" if self.target_min_chunk_words <= stats.get('word_count', 0) <= self.target_max_chunk_words else "red"
         return (
+            f"**Word Count:** <span style='color:{word_color};'>{stats.get('word_count', 0)}</span> &nbsp;&nbsp;|&nbsp;&nbsp; "
+            f"**Reading Ease:** <span style='color:{flesch_color};'>{stats.get('flesch_reading_ease', 0):.2f}</span> &nbsp;&nbsp;|&nbsp;&nbsp; "
+            f"**Grade Level:** <span style='color:{grade_color};'>{stats.get('flesch_kincaid_grade', 0):.2f}</span>"
         )
     def get_document_summary_stats(self) -> str:
+        """Calculates and formats stats for the entire document."""
         if not self._chunks:
             return "No document loaded."
         total_words = sum(c['stats']['word_count'] for c in self._chunks)
+        if len(self._chunks) > 0:
+            avg_ease = sum(c['stats']['flesch_reading_ease'] for c in self._chunks) / len(self._chunks)
+            avg_grade = sum(c['stats']['flesch_kincaid_grade'] for c in self._chunks) / len(self._chunks)
+        else:
+            avg_ease = avg_grade = 0
         return (
+            f"- **Total Chunks:** {len(self._chunks)}\n"
+            f"- **Total Words:** {total_words}\n"
+            f"- **Avg. Reading Ease:** {avg_ease:.2f}\n"
+            f"- **Avg. Grade Level:** {avg_grade:.2f}"
         )
     def get_chunk_by_id(self, chunk_id: int) -> dict | None:
         chunk = self.get_chunk_by_id(chunk_id)
         if chunk:
             chunk["content"] = new_content
+            self._add_stats_to_chunk(chunk) # Recalculate stats after update
     def delete_chunk(self, chunk_id: int):
         self._chunks = [c for c in self._chunks if c["id"] != chunk_id]
     def get_final_markdown(self) -> str:
         if not self._chunks:
             return "No content to display."
+        # Compile final document, adding headers back for chunks that have them
+        final_doc_parts = []
+        for c in self._chunks:
+            title_is_header = re.match(r"^(#+)\s*(.*)", c['title']) is None
+            if not c['title'].startswith("[") and not title_is_header:
+                final_doc_parts.append(f"## {c['title']}\n\n{c['content']}")
+            else:
+                final_doc_parts.append(c['content'])
+        return "\n\n---\n\n".join(final_doc_parts)
     def set_targets(self, flesch_min: float, grade_max: float, min_words: int, max_words: int):
         self.target_flesch_min = flesch_min
         self.target_grade_max = grade_max
         self.target_min_chunk_words = min_words
         self.target_max_chunk_words = max_words
+        # Recalculate stats for all chunks to reflect new targets
+        self.set_chunks(self.get_chunks())
+# --- Streamlit UI Application ---
 st.set_page_config(layout="wide", page_title="Webpage Content Editor")
+# Initialize session state for managers and UI state
 def init_session_state():
+    if 'processor' not in st.session_state:
+        st.session_state.processor = WebpageContentProcessor()
+    if 'manager' not in st.session_state:
+        st.session_state.manager = ChunkManager()
     if 'selected_chunk_id' not in st.session_state:
         st.session_state.selected_chunk_id = None
     if 'status_message' not in st.session_state:
         st.session_state.status_message = ""
 init_session_state()
+processor = st.session_state.processor
+manager = st.session_state.manager
+# --- Page Layout ---
 st.title("✨ Webpage Content Editor")
+st.caption("A tool to fetch, chunk, and refine web content.")
 st.info(
+    "**Note:** Some websites may block content scraping. This is an early version, so you might encounter bugs.",
     icon="ℹ️"
 )
+# URL input and processing button
+url_input = st.text_input("Enter a webpage URL to start", key="url_input")
+if st.button("Process URL", use_container_width=True, type="primary"):
+    if url_input:
+        with st.spinner("Fetching and chunking content..."):
+            markdown = processor.fetch_and_convert_to_markdown(url_input)
             if "Error" in markdown:
                 st.session_state.status_message = markdown
                 manager.set_chunks([])
                     st.session_state.status_message = f"Successfully processed {len(chunks)} chunks."
                     st.session_state.selected_chunk_id = chunks[0]['id']
                 else:
+                    st.session_state.status_message = "Could not extract any content chunks."
                     st.session_state.selected_chunk_id = None
+            st.rerun() # Rerun to update the UI with new state
+# Display status messages as toasts
 if st.session_state.status_message:
     st.toast(st.session_state.status_message)
+    st.session_state.status_message = "" # Clear after displaying
+# Main UI with tabs
 tab1, tab2 = st.tabs(["Chunk Editor", "Settings & Overview"])
 with tab1:
     chunks = manager.get_chunks()
     if not chunks:
+        st.write("Process a URL to begin editing content chunks.")
     else:
         chunk_ids = [c['id'] for c in chunks]
+        # Ensure the selected chunk ID is valid
         if st.session_state.selected_chunk_id not in chunk_ids:
             st.session_state.selected_chunk_id = chunk_ids[0] if chunk_ids else None
         if st.session_state.selected_chunk_id is not None:
+            chunk_options = {c['id']: c['title'] for c in chunks}
+            # Dropdown to select a chunk for editing
             selected_id = st.selectbox(
                 "Select a chunk to edit",
                 options=chunk_ids,
+                format_func=lambda x: f"Chunk {x}: {chunk_options.get(x, 'N/A')}",
                 index=chunk_ids.index(st.session_state.selected_chunk_id)
             )
+            # Update state if the selection changes
             if selected_id != st.session_state.selected_chunk_id:
                 st.session_state.selected_chunk_id = selected_id
                 st.rerun()
             selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
             if selected_chunk:
+                st.markdown(f"**Editing: {selected_chunk['title']}**")
                 st.markdown(manager.format_chunk_stats(selected_chunk['stats']), unsafe_allow_html=True)
+                # Text area for editing the selected chunk's content
                 edited_content = st.text_area(
                     "Chunk Content",
                     value=selected_chunk['content'],
+                    height=350,
+                    key=f"editor_{selected_chunk['id']}" # Unique key ensures the widget updates
                 )
+                # Action buttons for the selected chunk
+                col1, col2, _ = st.columns([1, 1, 5])
                 if col1.button("Update Chunk", use_container_width=True, key=f"update_{selected_chunk['id']}"):
                     manager.update_chunk_content(selected_chunk['id'], edited_content)
+                    st.session_state.status_message = "Chunk updated successfully!"
                     st.rerun()
                 if col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
                     manager.delete_chunk(selected_chunk['id'])
+                    st.session_state.status_message = "Chunk deleted."
+                    # Select the next available chunk or reset
                     remaining_chunks = manager.get_chunks()
                     st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
                     st.rerun()
     st.subheader("Content Targets")
     with st.form("targets_form"):
+        st.write("Set readability targets to guide your editing. See color feedback in the editor.")
         c1, c2 = st.columns(2)
         f_min = c1.number_input("Min Flesch Reading Ease", value=float(manager.target_flesch_min))
         g_max = c2.number_input("Max Flesch-Kincaid Grade", value=float(manager.target_grade_max))
         if st.form_submit_button("Set New Targets", use_container_width=True):
             manager.set_targets(f_min, g_max, w_min, w_max)
+            st.session_state.status_message = "Content targets have been updated."
             st.rerun()
+    st.subheader("Final Compiled Document")
+    st.text_area("Final Markdown Output", manager.get_final_markdown(), height=500, key="final_markdown")