Spaces:

Em4e
/

chunk-based-text-editor

Sleeping

App Files Files Community

Em4e commited on Jun 9

Commit

92ff4ac

verified ·

1 Parent(s): c7506fd

Update app.py

Browse files

Files changed (1) hide show

app.py +293 -296

app.py CHANGED Viewed

@@ -1,297 +1,294 @@
-import streamlit as st
-import requests
-from bs4 import BeautifulSoup
-from html_to_markdown import convert_to_markdown
-import re
-from llama_index.core.node_parser import MarkdownNodeParser
-from llama_index.core.schema import Document, MetadataMode
-import textstat # For readability metrics
-class WebpageContentProcessor:
-    """
-    Handles fetching, converting, and parsing webpage content into structured chunks.
-    Adheres to the Single Responsibility Principle (SRP) for content processing.
-    """
-    def __init__(self):
-        pass
-    def fetch_and_convert_to_markdown(self, url: str) -> str:
-        """
-        Fetches HTML content from a given URL, attempts to isolate the main content,
-        removes common boilerplate, and converts to Markdown.
-        Prioritizes semantic content tags over H1-based identification for robust extraction.
-        """
-        try:
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-            }
-            response = requests.get(url, headers=headers, timeout=15)
-            response.raise_for_status()
-            html_content = response.text
-            soup = BeautifulSoup(html_content, 'html.parser')
-            for tag_name in ['script', 'style', 'noscript', 'meta', 'link']:
-                for element in soup.find_all(tag_name):
-                    element.decompose()
-            content_for_conversion = soup.find('article') or soup.find('main') or \
-                                     soup.find('div', class_='main-content') or \
-                                     soup.find('div', {'role': 'main'})
-            if not content_for_conversion:
-                first_h1 = soup.find('h1')
-                if first_h1:
-                    candidate_container = first_h1.parent
-                    for _ in range(5):
-                        if candidate_container is None: break
-                        if candidate_container.name in ['article', 'main', 'section', 'div']:
-                            content_for_conversion = candidate_container
-                            break
-                        candidate_container = candidate_container.parent
-                    if not content_for_conversion:
-                         content_for_conversion = first_h1.find_parent()
-                else:
-                    content_for_conversion = soup.body
-            if not content_for_conversion:
-                return "Error: Could not identify main content for conversion."
-            unwanted_selectors = [
-                'nav', 'header', 'footer', 'aside', 'iframe', 'form', 'button', 'input',
-                'textarea', 'svg', 'figure', 'figcaption',
-                '.social-share', '.comments', '.related-posts', '.pagination',
-                '.breadcrumbs', '.cookie-consent', '[role="navigation"]',
-                '[role="banner"]', '[role="contentinfo"]', '[class*="ad"]', '[id*="ad"]'
-            ]
-            for selector in unwanted_selectors:
-                for element in content_for_conversion.select(selector):
-                    element.decompose()
-            markdown_output = convert_to_markdown(str(content_for_conversion))
-            markdown_output = re.sub(r'\n{3,}', '\n\n', markdown_output)
-            markdown_output = markdown_output.strip()
-            return markdown_output
-        except requests.exceptions.Timeout:
-            return "Error: Request timed out. The server took too long to respond."
-        except requests.exceptions.RequestException as e:
-            return f"Error fetching URL: {e}."
-        except Exception as e:
-            return f"An unexpected error occurred: {e}"
-    def parse_markdown_into_chunks(self, markdown_content: str) -> list:
-        if not markdown_content or "Error" in markdown_content:
-            return []
-        doc = Document(text=markdown_content)
-        parser = MarkdownNodeParser(include_metadata=True)
-        nodes = parser.get_nodes_from_documents([doc])
-        structured_chunks = []
-        for i, node in enumerate(nodes):
-            content = node.get_content(metadata_mode=MetadataMode.NONE).strip()
-            title_match = re.match(r"^(#+)\s*(.*)", content)
-            title = title_match.group(2).strip() if title_match else (content.split('\n')[0][:70] + "...")
-            structured_chunks.append({"id": i, "title": title, "content": content})
-        return structured_chunks
-class ChunkManager:
-    def __init__(self):
-        self._chunks = []
-        self.target_flesch_min = 60
-        self.target_grade_max = 8
-        self.target_min_chunk_words = 50
-        self.target_max_chunk_words = 500
-    def set_chunks(self, chunks: list):
-        self._chunks = [self._add_stats_to_chunk(chunk) for chunk in chunks]
-    def get_chunks(self) -> list:
-        return self._chunks
-    def _add_stats_to_chunk(self, chunk: dict) -> dict:
-        chunk['stats'] = self._calculate_chunk_stats(chunk['content'])
-        return chunk
-    def _calculate_chunk_stats(self, text: str) -> dict:
-        stats = {}
-        try:
-            stats['word_count'] = textstat.lexicon_count(text, removepunct=True)
-            stats['flesch_reading_ease'] = textstat.flesch_reading_ease(text)
-            stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(text)
-        except Exception:
-            stats.update({'word_count': 0, 'flesch_reading_ease': 0, 'flesch_kincaid_grade': 0})
-        return stats
-    def format_chunk_stats(self, stats: dict) -> str:
-        flesch_color = "green" if stats.get('flesch_reading_ease', 0) >= self.target_flesch_min else "red"
-        grade_color = "green" if stats.get('flesch_kincaid_grade', 0) <= self.target_grade_max else "red"
-        word_color = "green" if self.target_min_chunk_words <= stats.get('word_count', 0) <= self.target_max_chunk_words else "red"
-        return (
-            f"**Word Count:** <span style='color:{word_color}'>{stats.get('word_count', 0)}</span> | "
-            f"**Reading Ease:** <span style='color:{flesch_color}'>{stats.get('flesch_reading_ease', 0):.2f}</span> | "
-            f"**Grade Level:** <span style='color:{grade_color}'>{stats.get('flesch_kincaid_grade', 0):.2f}</span>"
-        )
-    def get_document_summary_stats(self) -> str:
-        if not self._chunks:
-            return "No document loaded."
-        total_words = sum(c['stats']['word_count'] for c in self._chunks)
-        avg_ease = sum(c['stats']['flesch_reading_ease'] for c in self._chunks) / len(self._chunks) if self._chunks else 0
-        avg_grade = sum(c['stats']['flesch_kincaid_grade'] for c in self._chunks) / len(self._chunks) if self._chunks else 0
-        return (
-            f"**Total Chunks:** {len(self._chunks)} | "
-            f"**Total Words:** {total_words} | "
-            f"**Avg. Reading Ease:** {avg_ease:.2f} | "
-            f"**Avg. Grade Level:** {avg_grade:.2f}"
-        )
-    def get_chunk_by_id(self, chunk_id: int) -> dict | None:
-        return next((c for c in self._chunks if c["id"] == chunk_id), None)
-    def update_chunk_content(self, chunk_id: int, new_content: str):
-        chunk = self.get_chunk_by_id(chunk_id)
-        if chunk:
-            chunk["content"] = new_content
-            self._add_stats_to_chunk(chunk)
-    def delete_chunk(self, chunk_id: int):
-        self._chunks = [c for c in self._chunks if c["id"] != chunk_id]
-        for i, chunk in enumerate(self._chunks):
-            chunk['id'] = i
-    def get_final_markdown(self) -> str:
-        if not self._chunks:
-            return "No content to display."
-        return "\n\n".join(f"# {c['title']}\n{c['content']}" for c in self._chunks)
-    def set_targets(self, flesch_min: float, grade_max: float, min_words: int, max_words: int):
-        self.target_flesch_min = flesch_min
-        self.target_grade_max = grade_max
-        self.target_min_chunk_words = min_words
-        self.target_max_chunk_words = max_words
-        self.set_chunks(self.get_chunks()) # Recalculate stats with new targets
-st.set_page_config(layout="wide", page_title="Webpage Content Editor")
-# Initialize session state variables
-if 'chunk_manager' not in st.session_state:
-    st.session_state.chunk_manager = ChunkManager()
-if 'content_processor' not in st.session_state:
-    st.session_state.content_processor = WebpageContentProcessor()
-if 'selected_chunk_id' not in st.session_state:
-    st.session_state.selected_chunk_id = None
-if 'status_message' not in st.session_state:
-    st.session_state.status_message = ""
-processor = st.session_state.content_processor
-manager = st.session_state.chunk_manager
-st.title("✨ Webpage Content Editor")
-st.caption("Created by [Emilija Gjorgjevska](https://www.linkedin.com/in/emilijagjorgjevska/) | Inspired by Andrea Volpini's work on content chunking.")
-st.info(
-    "**Note:** Some URLs may be blocked due to server policies (like bot detection). "
-    "This is an early version, so expect a few bugs!",
-    icon="ℹ️"
-)
-url_input = st.text_input("Enter a webpage URL to begin", key="url_input")
-if st.button("Process URL", use_container_width=True):
-    if url_input:
-        with st.spinner("Fetching and processing content..."):
-            markdown = processor.fetch_and_convert_to_markdown(url_input)
-            if "Error" in markdown:
-                st.session_state.status_message = markdown
-                manager.set_chunks([])
-            else:
-                chunks = processor.parse_markdown_into_chunks(markdown)
-                manager.set_chunks(chunks)
-                st.session_state.status_message = f"Successfully processed {len(chunks)} chunks." if chunks else "Could not extract content chunks."
-            if manager.get_chunks():
-                st.session_state.selected_chunk_id = manager.get_chunks()[0]['id']
-            else:
-                st.session_state.selected_chunk_id = None
-            st.rerun()
-if st.session_state.status_message:
-    st.toast(st.session_state.status_message)
-    st.session_state.status_message = "" # Clear message after showing
-tab1, tab2 = st.tabs(["Chunk Editor", "Settings & Overview"])
-with tab1:
-    chunks = manager.get_chunks()
-    if not chunks:
-        st.write("Process a URL to start editing chunks.")
-    else:
-        # Ensure selected_chunk_id is valid
-        if st.session_state.selected_chunk_id not in [c['id'] for c in chunks]:
-            st.session_state.selected_chunk_id = chunks[0]['id'] if chunks else None
-        if st.session_state.selected_chunk_id is not None:
-            chunk_options = {c['id']: f"Chunk {c['id']}: {c['title']}" for c in chunks}
-            def on_select_change():
-                # Callback to update the selected ID in session state
-                st.session_state.selected_chunk_id = st.session_state.chunk_selector
-            selected_id_from_widget = st.selectbox(
-                "Select a chunk to edit",
-                options=list(chunk_options.keys()),
-                format_func=lambda x: chunk_options[x],
-                key="chunk_selector",
-                on_change=on_select_change,
-                index=list(chunk_options.keys()).index(st.session_state.selected_chunk_id)
-            )
-            selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
-            if selected_chunk:
-                st.markdown(manager.format_chunk_stats(selected_chunk['stats']), unsafe_allow_html=True)
-                edited_content = st.text_area(
-                    "Chunk Content",
-                    value=selected_chunk['content'],
-                    height=300,
-                    key=f"editor_{selected_chunk['id']}" # Unique key forces re-render
-                )
-                col1, col2, _ = st.columns([1, 1, 4])
-                if col1.button("Update Chunk", use_container_width=True, key=f"update_{selected_chunk['id']}"):
-                    manager.update_chunk_content(selected_chunk['id'], edited_content)
-                    st.session_state.status_message = "Chunk updated!"
-                    st.rerun()
-                if col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
-                    old_id = selected_chunk['id']
-                    manager.delete_chunk(old_id)
-                    st.session_state.status_message = "Chunk deleted!"
-                    # Select the next available chunk or none if empty
-                    remaining_chunks = manager.get_chunks()
-                    st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
-                    st.rerun()
-with tab2:
-    st.subheader("Document Overview")
-    st.markdown(manager.get_document_summary_stats(), unsafe_allow_html=True)
-    st.subheader("Content Targets")
-    with st.form("targets_form"):
-        c1, c2 = st.columns(2)
-        f_min = c1.number_input("Min Flesch Reading Ease", value=float(manager.target_flesch_min))
-        g_max = c2.number_input("Max Flesch-Kincaid Grade", value=float(manager.target_grade_max))
-        w_min = c1.number_input("Min Chunk Words", value=int(manager.target_min_chunk_words))
-        w_max = c2.number_input("Max Chunk Words", value=int(manager.target_max_chunk_words))
-        if st.form_submit_button("Set New Targets", use_container_width=True):
-            manager.set_targets(f_min, g_max, w_min, w_max)
-            st.session_state.status_message = "Targets updated."
-            st.rerun()
-    st.subheader("Final Document")
     st.text_area("Compiled Markdown", manager.get_final_markdown(), height=400, disabled=False, key="final_markdown")

+import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+from html_to_markdown import convert_to_markdown
+import re
+from llama_index.core.node_parser import MarkdownNodeParser
+from llama_index.core.schema import Document, MetadataMode
+import textstat # For readability metrics
+class WebpageContentProcessor:
+    """
+    Handles fetching, converting, and parsing webpage content into structured chunks.
+    Adheres to the Single Responsibility Principle (SRP) for content processing.
+    """
+    def __init__(self):
+        pass
+    def fetch_and_convert_to_markdown(self, url: str) -> str:
+        """
+        Fetches HTML content from a given URL, attempts to isolate the main content,
+        removes common boilerplate, and converts to Markdown.
+        Prioritizes semantic content tags over H1-based identification for robust extraction.
+        """
+        try:
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            }
+            response = requests.get(url, headers=headers, timeout=15)
+            response.raise_for_status()
+            html_content = response.text
+            soup = BeautifulSoup(html_content, 'html.parser')
+            for tag_name in ['script', 'style', 'noscript', 'meta', 'link']:
+                for element in soup.find_all(tag_name):
+                    element.decompose()
+            content_for_conversion = soup.find('article') or soup.find('main') or \
+                                     soup.find('div', class_='main-content') or \
+                                     soup.find('div', {'role': 'main'})
+            if not content_for_conversion:
+                first_h1 = soup.find('h1')
+                if first_h1:
+                    candidate_container = first_h1.parent
+                    for _ in range(5):
+                        if candidate_container is None: break
+                        if candidate_container.name in ['article', 'main', 'section', 'div']:
+                            content_for_conversion = candidate_container
+                            break
+                        candidate_container = candidate_container.parent
+                    if not content_for_conversion:
+                         content_for_conversion = first_h1.find_parent()
+                else:
+                    content_for_conversion = soup.body
+            if not content_for_conversion:
+                return "Error: Could not identify main content for conversion."
+            unwanted_selectors = [
+                'nav', 'header', 'footer', 'aside', 'iframe', 'form', 'button', 'input',
+                'textarea', 'svg', 'figure', 'figcaption',
+                '.social-share', '.comments', '.related-posts', '.pagination',
+                '.breadcrumbs', '.cookie-consent', '[role="navigation"]',
+                '[role="banner"]', '[role="contentinfo"]', '[class*="ad"]', '[id*="ad"]'
+            ]
+            for selector in unwanted_selectors:
+                for element in content_for_conversion.select(selector):
+                    element.decompose()
+            markdown_output = convert_to_markdown(str(content_for_conversion))
+            markdown_output = re.sub(r'\n{3,}', '\n\n', markdown_output)
+            markdown_output = markdown_output.strip()
+            return markdown_output
+        except requests.exceptions.Timeout:
+            return "Error: Request timed out. The server took too long to respond."
+        except requests.exceptions.RequestException as e:
+            return f"Error fetching URL: {e}."
+        except Exception as e:
+            return f"An unexpected error occurred: {e}"
+    def parse_markdown_into_chunks(self, markdown_content: str) -> list:
+        if not markdown_content or "Error" in markdown_content:
+            return []
+        doc = Document(text=markdown_content)
+        parser = MarkdownNodeParser(include_metadata=True)
+        nodes = parser.get_nodes_from_documents([doc])
+        structured_chunks = []
+        for i, node in enumerate(nodes):
+            content = node.get_content(metadata_mode=MetadataMode.NONE).strip()
+            title_match = re.match(r"^(#+)\s*(.*)", content)
+            title = title_match.group(2).strip() if title_match else (content.split('\n')[0][:70] + "...")
+            structured_chunks.append({"id": i, "title": title, "content": content})
+        return structured_chunks
+class ChunkManager:
+    def __init__(self):
+        self._chunks = []
+        self.target_flesch_min = 60
+        self.target_grade_max = 8
+        self.target_min_chunk_words = 50
+        self.target_max_chunk_words = 500
+    def set_chunks(self, chunks: list):
+        self._chunks = [self._add_stats_to_chunk(chunk) for chunk in chunks]
+    def get_chunks(self) -> list:
+        return self._chunks
+    def _add_stats_to_chunk(self, chunk: dict) -> dict:
+        chunk['stats'] = self._calculate_chunk_stats(chunk['content'])
+        return chunk
+    def _calculate_chunk_stats(self, text: str) -> dict:
+        stats = {}
+        try:
+            stats['word_count'] = textstat.lexicon_count(text, removepunct=True)
+            stats['flesch_reading_ease'] = textstat.flesch_reading_ease(text)
+            stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(text)
+        except Exception:
+            stats.update({'word_count': 0, 'flesch_reading_ease': 0, 'flesch_kincaid_grade': 0})
+        return stats
+    def format_chunk_stats(self, stats: dict) -> str:
+        flesch_color = "green" if stats.get('flesch_reading_ease', 0) >= self.target_flesch_min else "red"
+        grade_color = "green" if stats.get('flesch_kincaid_grade', 0) <= self.target_grade_max else "red"
+        word_color = "green" if self.target_min_chunk_words <= stats.get('word_count', 0) <= self.target_max_chunk_words else "red"
+        return (
+            f"**Word Count:** <span style='color:{word_color}'>{stats.get('word_count', 0)}</span> | "
+            f"**Reading Ease:** <span style='color:{flesch_color}'>{stats.get('flesch_reading_ease', 0):.2f}</span> | "
+            f"**Grade Level:** <span style='color:{grade_color}'>{stats.get('flesch_kincaid_grade', 0):.2f}</span>"
+        )
+    def get_document_summary_stats(self) -> str:
+        if not self._chunks:
+            return "No document loaded."
+        total_words = sum(c['stats']['word_count'] for c in self._chunks)
+        avg_ease = sum(c['stats']['flesch_reading_ease'] for c in self._chunks) / len(self._chunks) if self._chunks else 0
+        avg_grade = sum(c['stats']['flesch_kincaid_grade'] for c in self._chunks) / len(self._chunks) if self._chunks else 0
+        return (
+            f"**Total Chunks:** {len(self._chunks)} | "
+            f"**Total Words:** {total_words} | "
+            f"**Avg. Reading Ease:** {avg_ease:.2f} | "
+            f"**Avg. Grade Level:** {avg_grade:.2f}"
+        )
+    def get_chunk_by_id(self, chunk_id: int) -> dict | None:
+        return next((c for c in self._chunks if c["id"] == chunk_id), None)
+    def update_chunk_content(self, chunk_id: int, new_content: str):
+        chunk = self.get_chunk_by_id(chunk_id)
+        if chunk:
+            chunk["content"] = new_content
+            self._add_stats_to_chunk(chunk)
+    def delete_chunk(self, chunk_id: int):
+        self._chunks = [c for c in self._chunks if c["id"] != chunk_id]
+        for i, chunk in enumerate(self._chunks):
+            chunk['id'] = i
+    def get_final_markdown(self) -> str:
+        if not self._chunks:
+            return "No content to display."
+        return "\n\n".join(f"# {c['title']}\n{c['content']}" for c in self._chunks)
+    def set_targets(self, flesch_min: float, grade_max: float, min_words: int, max_words: int):
+        self.target_flesch_min = flesch_min
+        self.target_grade_max = grade_max
+        self.target_min_chunk_words = min_words
+        self.target_max_chunk_words = max_words
+        self.set_chunks(self.get_chunks()) # Recalculate stats with new targets
+st.set_page_config(layout="wide", page_title="Webpage Content Editor")
+# Initialize session state variables
+if 'chunk_manager' not in st.session_state:
+    st.session_state.chunk_manager = ChunkManager()
+if 'content_processor' not in st.session_state:
+    st.session_state.content_processor = WebpageContentProcessor()
+if 'selected_chunk_id' not in st.session_state:
+    st.session_state.selected_chunk_id = None
+if 'status_message' not in st.session_state:
+    st.session_state.status_message = ""
+processor = st.session_state.content_processor
+manager = st.session_state.chunk_manager
+st.title("✨ Webpage Content Editor")
+st.caption("Created by [Emilija Gjorgjevska](https://www.linkedin.com/in/emilijagjorgjevska/) | Inspired by Andrea Volpini's work on content chunking.")
+st.info(
+    "**Note:** Some URLs may be blocked due to server policies (like bot detection). "
+    "This is an early version, so expect a few bugs!",
+    icon="ℹ️"
+)
+url_input = st.text_input("Enter a webpage URL to begin", key="url_input")
+if st.button("Process URL", use_container_width=True):
+    if url_input:
+        with st.spinner("Fetching and processing content..."):
+            markdown = processor.fetch_and_convert_to_markdown(url_input)
+            if "Error" in markdown:
+                st.session_state.status_message = markdown
+                manager.set_chunks([])
+            else:
+                chunks = processor.parse_markdown_into_chunks(markdown)
+                manager.set_chunks(chunks)
+                st.session_state.status_message = f"Successfully processed {len(chunks)} chunks." if chunks else "Could not extract content chunks."
+            if manager.get_chunks():
+                st.session_state.selected_chunk_id = manager.get_chunks()[0]['id']
+            else:
+                st.session_state.selected_chunk_id = None
+            st.rerun()
+if st.session_state.status_message:
+    st.toast(st.session_state.status_message)
+    st.session_state.status_message = "" # Clear message after showing
+tab1, tab2 = st.tabs(["Chunk Editor", "Settings & Overview"])
+with tab1:
+    chunks = manager.get_chunks()
+    if not chunks:
+        st.write("Process a URL to start editing chunks.")
+    else:
+        # Ensure selected_chunk_id is valid
+        if st.session_state.selected_chunk_id not in [c['id'] for c in chunks]:
+            st.session_state.selected_chunk_id = chunks[0]['id'] if chunks else None
+        if st.session_state.selected_chunk_id is not None:
+            chunk_options = {c['id']: f"Chunk {c['id']}: {c['title']}" for c in chunks}
+            # This selectbox now directly manages `selected_chunk_id` in the session state.
+            # When a user makes a selection, Streamlit automatically updates the state and reruns the script.
+            st.selectbox(
+                "Select a chunk to edit",
+                options=list(chunk_options.keys()),
+                format_func=lambda x: chunk_options.get(x, "Invalid Chunk"),
+                key="selected_chunk_id", # The key is now the session state variable itself
+                index=list(chunk_options.keys()).index(st.session_state.selected_chunk_id)
+            )
+            selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
+            if selected_chunk:
+                st.markdown(manager.format_chunk_stats(selected_chunk['stats']), unsafe_allow_html=True)
+                edited_content = st.text_area(
+                    "Chunk Content",
+                    value=selected_chunk['content'],
+                    height=300,
+                    key=f"editor_{selected_chunk['id']}" # Unique key forces re-render
+                )
+                col1, col2, _ = st.columns([1, 1, 4])
+                if col1.button("Update Chunk", use_container_width=True, key=f"update_{selected_chunk['id']}"):
+                    manager.update_chunk_content(selected_chunk['id'], edited_content)
+                    st.session_state.status_message = "Chunk updated!"
+                    st.rerun()
+                if col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
+                    old_id = selected_chunk['id']
+                    manager.delete_chunk(old_id)
+                    st.session_state.status_message = "Chunk deleted!"
+                    # Select the next available chunk or none if empty
+                    remaining_chunks = manager.get_chunks()
+                    st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
+                    st.rerun()
+with tab2:
+    st.subheader("Document Overview")
+    st.markdown(manager.get_document_summary_stats(), unsafe_allow_html=True)
+    st.subheader("Content Targets")
+    with st.form("targets_form"):
+        c1, c2 = st.columns(2)
+        f_min = c1.number_input("Min Flesch Reading Ease", value=float(manager.target_flesch_min))
+        g_max = c2.number_input("Max Flesch-Kincaid Grade", value=float(manager.target_grade_max))
+        w_min = c1.number_input("Min Chunk Words", value=int(manager.target_min_chunk_words))
+        w_max = c2.number_input("Max Chunk Words", value=int(manager.target_max_chunk_words))
+        if st.form_submit_button("Set New Targets", use_container_width=True):
+            manager.set_targets(f_min, g_max, w_min, w_max)
+            st.session_state.status_message = "Targets updated."
+            st.rerun()
+    st.subheader("Final Document")
     st.text_area("Compiled Markdown", manager.get_final_markdown(), height=400, disabled=False, key="final_markdown")