Spaces:

Em4e
/

chunk-based-text-editor

Sleeping

App Files Files Community

Em4e commited on Jun 9, 2025

Commit

dd5c454

verified ·

1 Parent(s): 0ec0963

Update app.py

Browse files

Files changed (1) hide show

app.py +576 -572

app.py CHANGED Viewed

@@ -1,573 +1,577 @@
-import streamlit as st
-import requests
-from bs4 import BeautifulSoup
-from html_to_markdown import convert_to_markdown
-import re
-from llama_index.core.node_parser import MarkdownNodeParser
-from llama_index.core.schema import Document, MetadataMode
-import textstat # For readability metrics
-class WebpageContentProcessor:
-    """
-    Handles fetching, converting, and parsing webpage content into structured chunks.
-    Adheres to the Single Responsibility Principle (SRP) for content processing.
-    """
-    def __init__(self):
-        pass
-    def fetch_and_convert_to_markdown(self, url: str) -> str:
-        """
-        Fetches HTML content from a given URL, attempts to isolate the main content,
-        removes common boilerplate, and converts to Markdown.
-        Prioritizes semantic content tags over H1-based identification for robust extraction.
-        """
-        try:
-            response = requests.get(url, timeout=10) # Add a timeout for robustness
-            response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
-            html_content = response.text
-            soup = BeautifulSoup(html_content, 'html.parser')
-            # Aggressive initial removal of script, style, and meta tags that are never content.
-            for tag_name in ['script', 'style', 'noscript', 'meta', 'link']:
-                for element in soup.find_all(tag_name):
-                    element.decompose()
-            content_for_conversion = None
-            # Prioritize finding main content containers first (semantic tags and common divs)
-            content_for_conversion = soup.find('article') or soup.find('main') or \
-                                     soup.find('div', class_='main-content') or \
-                                     soup.find('div', {'role': 'main'})
-            # Fallback logic if main content container wasn't found
-            if not content_for_conversion:
-                first_h1 = soup.find('h1')
-                if first_h1:
-                    candidate_container = first_h1.parent
-                    found_main_wrapper_via_h1_parent = False
-                    # Check up to 5 parent levels for a suitable content wrapper
-                    for _ in range(5):
-                        if candidate_container is None:
-                            break
-                        if candidate_container.name in ['article', 'main', 'section', 'div'] and \
-                           any(cls in candidate_container.get('class', []) for cls in ['content', 'post-body', 'article-content', 'entry-content', 'main-content']) or \
-                           candidate_container.get('role') == 'main':
-                            content_for_conversion = candidate_container
-                            found_main_wrapper_via_h1_parent = True
-                            break
-                        candidate_container = candidate_container.parent
-                    # If no clear wrapper found via H1's parent, take H1 and its direct siblings as a fallback
-                    if not found_main_wrapper_via_h1_parent:
-                        temp_soup = BeautifulSoup('', 'html.parser')
-                        temp_soup.append(first_h1)
-                        current_element = first_h1.next_sibling
-                        while current_element:
-                            temp_soup.append(current_element)
-                            current_element = current_element.next_sibling
-                        content_for_conversion = temp_soup
-                else:
-                    # Ultimate fallback: use the entire body if no specific content tags or H1 found
-                    content_for_conversion = soup.body
-            if not content_for_conversion:
-                return "Error: Could not identify main content for conversion."
-            # Selective boilerplate removal within the *identified* main content tag
-            unwanted_elements_in_content = [
-                'nav', 'header', 'footer', 'aside', 'iframe', 'form', 'button', 'input',
-                'textarea', 'svg', 'canvas', 'audio', 'video', 'picture', 'source', 'track',
-                'map', 'area', 'embed', 'object', 'param', 'applet', 'bgsound', 'frame',
-                'frameset', 'noframes', 'template', 'slot', 'portal', 'datalist', 'keygen',
-                'output', 'progress', 'meter', 'details', 'summary', 'dialog', 'menu',
-                'menuitem', 'command', 'hr', 'figure', 'figcaption', 'cite',
-                '.social-share', '.comments', '.related-posts', '.pagination',
-                '.breadcrumbs', '.pop-up', '.modal', '.overlay', '.cookie-consent',
-                '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]',
-                '[role="complementary"]', '[role="search"]', '[role="menubar"]', '[role="toolbar"]',
-                '[class*="utility"]', '[class*="global-nav"]', '[class*="skip"]', '[class*="toast"]',
-                '[class*="announcement"]', '[class*="fixed-bottom"]', '[class*="fixed-top"]',
-                '[id*="promo"]', '[id*="ad"]', '[id*="banner"]', '[id*="popup"]', '[id*="modal"]',
-                '[id*="overlay"]', '[id*="cookie"]', '[id*="skip"]', '[id*="navbar"]', '[id*="menu"]',
-                '.hidden', '.visually-hidden',
-                '.no-print', '.print-hide',
-                '.wp-block-navigation', '.wp-block-group.is-style-stripes',
-                '[class*="column"]', '[class*="grid"]'
-            ]
-            for selector in unwanted_elements_in_content:
-                if re.match(r'^[a-zA-Z0-9]+$', selector):
-                    for element in content_for_conversion.find_all(selector):
-                        element.decompose()
-                else:
-                    for element in content_for_conversion.select(selector):
-                        element.decompose()
-            markdown_output = convert_to_markdown(str(content_for_conversion))
-            # Post-processing: Clean up resulting Markdown
-            markdown_output = re.sub(r'\n\s*\n\s*\n+', '\n\n', markdown_output)
-            markdown_output = re.sub(r'^\s*[\*\-]\s*$', '', markdown_output, flags=re.MULTILINE)
-            markdown_output = re.sub(r'\*{3,}', '', markdown_output)
-            markdown_output = markdown_output.strip()
-            return markdown_output
-        except requests.exceptions.Timeout:
-            return "Error: Request timed out. The server took too long to respond."
-        except requests.exceptions.RequestException as e:
-            return f"Error fetching URL: {e}. Please check the URL or your internet connection."
-        except Exception as e:
-            return f"An unexpected error occurred during HTML conversion: {e}"
-    def parse_markdown_into_chunks(self, markdown_content: str) -> list:
-        """
-        Parses Markdown content into LlamaIndex nodes (chunks) and extracts title and content.
-        Adheres to SRP for parsing logic.
-        """
-        if not markdown_content or "Error fetching URL" in markdown_content or "An unexpected error occurred" in markdown_content:
-            return []
-        doc = Document(text=markdown_content, metadata={"filename": "webpage_content"})
-        parser = MarkdownNodeParser(include_metadata=True)
-        nodes = parser.get_nodes_from_documents([doc])
-        print(f"✅ Parsed {len(nodes)} nodes from Markdown.") # Debug print
-        structured_chunks = []
-        current_id = 0
-        for node in nodes:
-            pure_text_content = node.get_content(metadata_mode=MetadataMode.NONE).strip()
-            heading_title = ""
-            content_text = pure_text_content
-            heading_match = re.match(r"^(#+)\s*(.*)", pure_text_content)
-            if heading_match:
-                heading_title = heading_match.group(2).strip()
-                content_text = pure_text_content[len(heading_match.group(0)):].strip()
-                if not heading_title:
-                    heading_title = "[Untitled Section]"
-            else:
-                first_line = content_text.split('\n')[0].strip()
-                heading_title = first_line[:70].strip() + "..." if len(first_line) > 70 else first_line
-                if not heading_title:
-                    heading_title = "[Empty Section]"
-                elif not content_text:
-                        heading_title = "[Empty Section]"
-            structured_chunks.append({
-                "id": current_id,
-                "title": heading_title,
-                "content": content_text,
-                "original_node": node # Keep reference to the original LlamaIndex node
-            })
-            current_id += 1
-        return structured_chunks
-class ChunkManager:
-    """
-    Manages the collection of content chunks, their statistics, and target settings.
-    Adheres to SRP for chunk data management and OCP by allowing new statistics
-    or formatting without changing core chunk operations.
-    """
-    def __init__(self):
-        self._chunks = []
-        self.target_flesch_min = 60
-        self.target_grade_max = 8
-        self.target_min_chunk_words = 50
-        self.target_max_chunk_words = 500
-    def set_chunks(self, chunks: list):
-        """Sets the internal list of chunks and calculates their initial statistics."""
-        self._chunks = []
-        for chunk in chunks:
-            chunk['stats'] = self._calculate_chunk_stats(chunk['content'])
-            self._chunks.append(chunk)
-    def get_chunks(self) -> list:
-        """Returns the current list of processed chunks."""
-        return self._chunks
-    def _calculate_chunk_stats(self, text: str) -> dict:
-        """
-        Calculates various linguistic statistics for a given text chunk.
-        (Private helper method, SRP for stats calculation)
-        """
-        stats = {}
-        cleaned_text = re.sub(r'#+\s*', '', text)
-        cleaned_text = re.sub(r'[\*\-]\s*', '', cleaned_text)
-        cleaned_text = re.sub(r'\n\s*\n+', ' ', cleaned_text).strip()
-        stats['word_count'] = textstat.lexicon_count(cleaned_text, removepunct=True)
-        stats['char_count'] = len(cleaned_text)
-        stats['sentence_count'] = textstat.sentence_count(cleaned_text)
-        if stats['sentence_count'] > 0:
-            stats['avg_sentence_length'] = stats['word_count'] / stats['sentence_count']
-        else:
-            stats['avg_sentence_length'] = 0
-        stats['paragraph_count'] = cleaned_text.count('\n\n') + 1 if cleaned_text else 0
-        try:
-            stats['flesch_reading_ease'] = textstat.flesch_reading_ease(cleaned_text)
-        except Exception:
-            stats['flesch_reading_ease'] = 0
-        try:
-            stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(cleaned_text)
-        except Exception:
-            stats['flesch_kincaid_grade'] = 0
-        try:
-            stats['gunning_fog_score'] = textstat.gunning_fog(cleaned_text)
-        except Exception:
-            stats['gunning_fog_score'] = 0
-        return stats
-    def format_chunk_stats(self, stats: dict) -> str:
-        """
-        Formats chunk statistics into a readable string, including explanations for readability scores.
-        Adheres to SRP for formatting.
-        """
-        flesch_ease_color = "red" if stats['flesch_reading_ease'] < self.target_flesch_min else "green"
-        kincaid_grade_color = "red" if stats['flesch_kincaid_grade'] > self.target_grade_max else "green"
-        word_count_color = "red" if not (self.target_min_chunk_words <= stats['word_count'] <= self.target_max_chunk_words) else "green"
-        stats_str = "#### Chunk Statistics:\n"
-        stats_str += f"- **Word Count:** <span style='color:{word_count_color}'>{stats['word_count']}</span> (Target: {self.target_min_chunk_words}-{self.target_max_chunk_words})\n"
-        stats_str += f"- **Character Count:** {stats['char_count']}\n"
-        stats_str += f"- **Sentence Count:** {stats['sentence_count']}\n"
-        stats_str += f"- **Avg Sentence Length:** {stats['avg_sentence_length']:.2f} words\n"
-        stats_str += f"- **Paragraph Count:** {stats['paragraph_count']}\n"
-        stats_str += f"- **Flesch Reading Ease:** <span style='color:{flesch_ease_color}'>{stats['flesch_reading_ease']:.2f}</span> (Higher scores mean easier to read.)\n"
-        stats_str += f"- **Flesch-Kincaid Grade:** <span style='color:{kincaid_grade_color}'>{stats['flesch_kincaid_grade']:.2f}</span> (Indicates the U.S. grade level needed to understand the text.)\n"
-        stats_str += f"- **Gunning Fog Score:** {stats['gunning_fog_score']:.2f}\n"
-        return stats_str
-    def get_document_summary_stats(self) -> str:
-        """
-        Aggregates statistics for the entire document across all managed chunks.
-        Adheres to SRP for document-level summary.
-        """
-        if not self._chunks:
-            return "No document loaded to generate statistics."
-        total_words = 0
-        total_chars = 0
-        total_sentences = 0
-        total_paragraphs = 0
-        all_content_text = ""
-        for chunk in self._chunks:
-            content_text_for_stats = chunk['content']
-            # Re-calculate stats for each chunk content to ensure summary is up-to-date
-            current_chunk_stats = self._calculate_chunk_stats(content_text_for_stats)
-            total_words += current_chunk_stats['word_count']
-            total_chars += current_chunk_stats['char_count']
-            total_sentences += current_chunk_stats['sentence_count']
-            total_paragraphs += current_chunk_stats['paragraph_count']
-            all_content_text += content_text_for_stats + "\n\n"
-        doc_stats_str = "## Overall Document Statistics:\n"
-        doc_stats_str += f"- **Total Chunks:** {len(self._chunks)}\n"
-        doc_stats_str += f"- **Total Words:** {total_words}\n"
-        doc_stats_str += f"- **Total Characters:** {total_chars}\n"
-        doc_stats_str += f"- **Total Sentences:** {total_sentences}\n"
-        doc_stats_str += f"- **Total Paragraphs:** {total_paragraphs}\n"
-        if len(self._chunks) > 0:
-            doc_stats_str += f"- **Average Words per Chunk:** {total_words / len(self._chunks):.2f}\n"
-        if all_content_text.strip():
-            overall_stats = self._calculate_chunk_stats(all_content_text)
-            doc_stats_str += f"- **Overall Flesch Reading Ease:** {overall_stats['flesch_reading_ease']:.2f}\n"
-            doc_stats_str += f"- **Overall Flesch-Kincaid Grade Level:** {overall_stats['flesch_kincaid_grade']:.2f}\n"
-            doc_stats_str += f"- **Overall Gunning Fog Score:** {overall_stats['gunning_fog_score']:.2f}\n"
-            doc_stats_str += f"- **Overall Average Sentence Length:** {overall_stats['avg_sentence_length']:.2f} words\n"
-        else:
-            doc_stats_str += "- No content available for overall readability metrics.\n"
-        return doc_stats_str
-    def get_chunk_by_id(self, chunk_id: int) -> dict | None:
-        """Retrieves a chunk by its ID."""
-        return next((chunk for chunk in self._chunks if chunk["id"] == chunk_id), None)
-    def get_chunk_titles_for_dropdown(self) -> list:
-        """Generates dropdown choices using plain text (no HTML)."""
-        dropdown_choices = []
-        for chunk in self._chunks:
-            title = chunk['title']
-            dropdown_choices.append(f"{chunk['id']}: {title}")
-        return dropdown_choices
-    def update_chunk_content(self, chunk_id: int, new_content: str) -> bool:
-        """
-        Updates the content of a chunk, recalculates its stats, and updates its title if needed.
-        Returns True if successful, False otherwise.
-        """
-        for chunk in self._chunks:
-            if chunk["id"] == chunk_id:
-                chunk["content"] = new_content
-                chunk["stats"] = self._calculate_chunk_stats(new_content)
-                # Update chunk title if it was a placeholder or empty
-                if chunk["title"].startswith("[") and chunk["title"].endswith("]") or not chunk["title"]:
-                    first_line = new_content.split('\n')[0].strip()
-                    chunk["title"] = first_line[:70].strip() + "..." if len(first_line) > 70 else first_line
-                    if not chunk["title"]:
-                        chunk["title"] = "[Empty Section]"
-                    elif not new_content:
-                            chunk["title"] = "[Empty Section]"
-                return True
-        return False
-    def delete_chunk(self, chunk_id: int) -> bool:
-        """
-        Deletes a chunk by ID and re-indexes remaining chunks.
-        Returns True if successful, False otherwise.
-        """
-        initial_chunk_count = len(self._chunks)
-        self._chunks = [chunk for chunk in self._chunks if chunk["id"] != chunk_id]
-        if len(self._chunks) == initial_chunk_count:
-            return False # Chunk not found
-        # Re-index IDs to be sequential again
-        for i, chunk in enumerate(self._chunks):
-            chunk['id'] = i
-        return True
-    def get_final_markdown(self) -> str:
-        """Compiles all current chunks into a single Markdown string."""
-        final_md = ""
-        if not self._chunks:
-            return "No content to compile. Please process a URL first."
-        for chunk in self._chunks:
-            # Use H1 heading if title is meaningful
-            if not chunk["title"].startswith("[") and chunk["title"]:
-                final_md += f"# {chunk['title']}\n\n"
-            final_md += f"{chunk['content']}\n\n"
-        return final_md.strip()
-    def set_targets(self, flesch_min: float, grade_max: float, min_words: int, max_words: int):
-        """Sets the global readability and word count targets."""
-        self.target_flesch_min = flesch_min
-        self.target_grade_max = grade_max
-        self.target_min_chunk_words = min_words
-        self.target_max_chunk_words = max_words
-        # Recalculate stats for all chunks to reflect new targets in color coding (if displayed)
-        for chunk in self._chunks:
-            chunk['stats'] = self._calculate_chunk_stats(chunk['content'])
-# --- Streamlit UI Definition ---
-st.set_page_config(layout="wide", page_title="Chunk-Powered Webpage Editor")
-# Initialize session state
-if 'chunk_manager' not in st.session_state:
-    st.session_state.chunk_manager = ChunkManager()
-if 'content_processor' not in st.session_state:
-    st.session_state.content_processor = WebpageContentProcessor()
-if 'status_message' not in st.session_state:
-    st.session_state.status_message = ""
-if 'chunk_selector' not in st.session_state:
-    st.session_state.chunk_selector = None
-if 'chunk_content_editor' not in st.session_state:
-    st.session_state.chunk_content_editor = ""
-if 'final_markdown' not in st.session_state:
-    st.session_state.final_markdown = "Click 'Compile All Chunks' to see the final document with your edits."
-# Instantiate the managers
-content_processor = st.session_state.content_processor
-chunk_manager = st.session_state.chunk_manager
-st.markdown("# <center>✨ Chunk-Powered Webpage Editor ✨</center>", unsafe_allow_html=True)
-st.markdown(
-    "Enter a URL, fetch its content, and break it into editable 'chunks'. "
-    "Review statistics, set targets, edit chunks, and compile your final Markdown."
-)
-# --- URL Input and Processing ---
-col1, col2 = st.columns([4, 1])
-with col1:
-    url_input = st.text_input(
-        label="Enter Webpage URL",
-        placeholder="e.g., https://www.llamaindex.ai/blog/what-is-llamaindex",
-        key="url_input"
-    )
-with col2:
-    st.write("") # Spacer
-    st.write("") # Spacer
-    process_button = st.button("Process URL", use_container_width=True)
-if st.session_state.status_message:
-    st.info(st.session_state.status_message)
-if process_button:
-    if not url_input:
-        st.session_state.status_message = "Please enter a URL to process."
-    else:
-        with st.spinner("Processing URL..."):
-            markdown_content = content_processor.fetch_and_convert_to_markdown(url_input)
-            if "Error" in markdown_content:
-                chunk_manager.set_chunks([])
-                st.session_state.status_message = markdown_content
-            else:
-                chunks = content_processor.parse_markdown_into_chunks(markdown_content)
-                chunk_manager.set_chunks(chunks)
-                st.session_state.status_message = "URL processed successfully!" if chunks else "URL processed, but no content chunks could be extracted."
-                if chunks:
-                    st.session_state.chunk_selector = chunk_manager.get_chunk_titles_for_dropdown()[0]
-                else:
-                    st.session_state.chunk_selector = None
-# --- Tabs for Editor and Overview ---
-tab1, tab2 = st.tabs(["Editor", "Document Overview & Targets"])
-with tab1:
-    st.markdown("## Edit Chunks Individually")
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        chunk_selector_options = chunk_manager.get_chunk_titles_for_dropdown()
-        if chunk_selector_options:
-            try:
-                # Find the index of the currently selected item to handle updates
-                current_selection_index = chunk_selector_options.index(st.session_state.chunk_selector)
-            except (ValueError, TypeError):
-                current_selection_index = 0
-            selected_chunk_title = st.selectbox(
-                label="Select Chunk to Edit",
-                options=chunk_selector_options,
-                index=current_selection_index,
-                key="chunk_selector"
-            )
-        else:
-            selected_chunk_title = st.selectbox(
-                label="Select Chunk to Edit",
-                options=["No chunks available"],
-                disabled=True
-            )
-    with col2:
-        nav_col1, nav_col2 = st.columns(2)
-        with nav_col1:
-            if st.button("⬅️ Previous Chunk", use_container_width=True):
-                if selected_chunk_title and "No chunks available" not in selected_chunk_title:
-                    current_id = int(selected_chunk_title.split(':')[0].strip())
-                    new_id = max(0, current_id - 1)
-                    new_chunk = chunk_manager.get_chunk_by_id(new_id)
-                    if new_chunk:
-                        st.session_state.chunk_selector = f"{new_chunk['id']}: {new_chunk['title']}"
-        with nav_col2:
-            if st.button("Next Chunk ➡️", use_container_width=True):
-                 if selected_chunk_title and "No chunks available" not in selected_chunk_title:
-                    current_id = int(selected_chunk_title.split(':')[0].strip())
-                    new_id = min(len(chunk_manager.get_chunks()) - 1, current_id + 1)
-                    new_chunk = chunk_manager.get_chunk_by_id(new_id)
-                    if new_chunk:
-                        st.session_state.chunk_selector = f"{new_chunk['id']}: {new_chunk['title']}"
-    # Get the currently selected chunk
-    selected_chunk = None
-    if selected_chunk_title and "No chunks available" not in selected_chunk_title:
-        current_id = int(selected_chunk_title.split(':')[0].strip())
-        selected_chunk = chunk_manager.get_chunk_by_id(current_id)
-    if selected_chunk:
-        st.text_input(
-            label="Chunk Title (Auto-detected)",
-            value=selected_chunk["title"],
-            disabled=True
-        )
-        chunk_content_editor = st.text_area(
-            label="Chunk Content",
-            value=selected_chunk["content"],
-            height=250,
-            key=f"editor_{selected_chunk['id']}" # Unique key to prevent state loss
-        )
-        st.markdown(
-            chunk_manager.format_chunk_stats(selected_chunk['stats']),
-            unsafe_allow_html=True
-        )
-        update_col, delete_col, _ = st.columns([1, 1, 3])
-        with update_col:
-            if st.button("Update Selected Chunk", use_container_width=True):
-                chunk_manager.update_chunk_content(selected_chunk['id'], chunk_content_editor)
-                st.session_state.status_message = f"Chunk '{selected_chunk_title}' updated successfully!"
-                # Force a re-render to update the dropdown with the new title
-                st.session_state.chunk_selector = f"{selected_chunk['id']}: {chunk_manager.get_chunk_by_id(selected_chunk['id'])['title']}"
-        with delete_col:
-            if st.button("Delete Selected Chunk", use_container_width=True):
-                chunk_manager.delete_chunk(selected_chunk['id'])
-                st.session_state.status_message = f"Chunk '{selected_chunk_title}' deleted successfully!"
-                if chunk_manager.get_chunks():
-                    st.session_state.chunk_selector = chunk_manager.get_chunk_titles_for_dropdown()[0]
-                else:
-                    st.session_state.chunk_selector = None
-    else:
-        st.text_input("Chunk Title (Auto-detected)", "Title of the selected chunk", disabled=True)
-        st.text_area("Chunk Content", "Content of the selected chunk will appear here for editing.", height=250, disabled=True)
-        st.markdown("Chunk statistics will appear here.")
-    st.markdown("---")
-    st.markdown("## Final Compiled Markdown")
-    if st.button("Compile All Chunks", use_container_width=True):
-        st.session_state.final_markdown = chunk_manager.get_final_markdown()
-    st.text_area(
-        label="Compiled Markdown",
-        value=st.session_state.final_markdown,
-        height=400,
-        key="final_markdown_output",
-        disabled=False
-    )
-with tab2:
-    st.markdown("## Document Summary Statistics")
-    st.markdown(chunk_manager.get_document_summary_stats(), unsafe_allow_html=True)
-    st.markdown("---")
-    st.markdown("## Content Targets")
-    st.markdown("Adjust these targets to guide your writing and see visual feedback in the chunk selector (green=good, red=needs attention).")
-    with st.form("targets_form"):
-        col1, col2 = st.columns(2)
-        with col1:
-            target_flesch_min_input = st.number_input("Min Flesch Reading Ease", value=float(chunk_manager.target_flesch_min))
-            target_min_chunk_words_input = st.number_input("Min Chunk Words", value=chunk_manager.target_min_chunk_words)
-        with col2:
-            target_grade_max_input = st.number_input("Max Flesch-Kincaid Grade", value=float(chunk_manager.target_grade_max))
-            target_max_chunk_words_input = st.number_input("Max Chunk Words", value=chunk_manager.target_max_chunk_words)
-        submitted = st.form_submit_button("Set New Targets", use_container_width=True)
-        if submitted:
-            chunk_manager.set_targets(
-                target_flesch_min_input,
-                target_grade_max_input,
-                int(target_min_chunk_words_input),
-                int(target_max_chunk_words_input)
-            )
-            st.session_state.status_message = "Target settings updated."
             st.rerun()

+import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+from html_to_markdown import convert_to_markdown
+import re
+from llama_index.core.node_parser import MarkdownNodeParser
+from llama_index.core.schema import Document, MetadataMode
+import textstat # For readability metrics
+class WebpageContentProcessor:
+    """
+    Handles fetching, converting, and parsing webpage content into structured chunks.
+    Adheres to the Single Responsibility Principle (SRP) for content processing.
+    """
+    def __init__(self):
+        pass
+    def fetch_and_convert_to_markdown(self, url: str) -> str:
+        """
+        Fetches HTML content from a given URL, attempts to isolate the main content,
+        removes common boilerplate, and converts to Markdown.
+        Prioritizes semantic content tags over H1-based identification for robust extraction.
+        """
+        try:
+            response = requests.get(url, timeout=10) # Add a timeout for robustness
+            response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
+            html_content = response.text
+            soup = BeautifulSoup(html_content, 'html.parser')
+            # Aggressive initial removal of script, style, and meta tags that are never content.
+            for tag_name in ['script', 'style', 'noscript', 'meta', 'link']:
+                for element in soup.find_all(tag_name):
+                    element.decompose()
+            content_for_conversion = None
+            # Prioritize finding main content containers first (semantic tags and common divs)
+            content_for_conversion = soup.find('article') or soup.find('main') or \
+                                     soup.find('div', class_='main-content') or \
+                                     soup.find('div', {'role': 'main'})
+            # Fallback logic if main content container wasn't found
+            if not content_for_conversion:
+                first_h1 = soup.find('h1')
+                if first_h1:
+                    candidate_container = first_h1.parent
+                    found_main_wrapper_via_h1_parent = False
+                    # Check up to 5 parent levels for a suitable content wrapper
+                    for _ in range(5):
+                        if candidate_container is None:
+                            break
+                        if candidate_container.name in ['article', 'main', 'section', 'div'] and \
+                           any(cls in candidate_container.get('class', []) for cls in ['content', 'post-body', 'article-content', 'entry-content', 'main-content']) or \
+                           candidate_container.get('role') == 'main':
+                            content_for_conversion = candidate_container
+                            found_main_wrapper_via_h1_parent = True
+                            break
+                        candidate_container = candidate_container.parent
+                    # If no clear wrapper found via H1's parent, take H1 and its direct siblings as a fallback
+                    if not found_main_wrapper_via_h1_parent:
+                        temp_soup = BeautifulSoup('', 'html.parser')
+                        temp_soup.append(first_h1)
+                        current_element = first_h1.next_sibling
+                        while current_element:
+                            temp_soup.append(current_element)
+                            current_element = current_element.next_sibling
+                        content_for_conversion = temp_soup
+                else:
+                    # Ultimate fallback: use the entire body if no specific content tags or H1 found
+                    content_for_conversion = soup.body
+            if not content_for_conversion:
+                return "Error: Could not identify main content for conversion."
+            # Selective boilerplate removal within the *identified* main content tag
+            unwanted_elements_in_content = [
+                'nav', 'header', 'footer', 'aside', 'iframe', 'form', 'button', 'input',
+                'textarea', 'svg', 'canvas', 'audio', 'video', 'picture', 'source', 'track',
+                'map', 'area', 'embed', 'object', 'param', 'applet', 'bgsound', 'frame',
+                'frameset', 'noframes', 'template', 'slot', 'portal', 'datalist', 'keygen',
+                'output', 'progress', 'meter', 'details', 'summary', 'dialog', 'menu',
+                'menuitem', 'command', 'hr', 'figure', 'figcaption', 'cite',
+                '.social-share', '.comments', '.related-posts', '.pagination',
+                '.breadcrumbs', '.pop-up', '.modal', '.overlay', '.cookie-consent',
+                '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]',
+                '[role="complementary"]', '[role="search"]', '[role="menubar"]', '[role="toolbar"]',
+                '[class*="utility"]', '[class*="global-nav"]', '[class*="skip"]', '[class*="toast"]',
+                '[class*="announcement"]', '[class*="fixed-bottom"]', '[class*="fixed-top"]',
+                '[id*="promo"]', '[id*="ad"]', '[id*="banner"]', '[id*="popup"]', '[id*="modal"]',
+                '[id*="overlay"]', '[id*="cookie"]', '[id*="skip"]', '[id*="navbar"]', '[id*="menu"]',
+                '.hidden', '.visually-hidden',
+                '.no-print', '.print-hide',
+                '.wp-block-navigation', '.wp-block-group.is-style-stripes',
+                '[class*="column"]', '[class*="grid"]'
+            ]
+            for selector in unwanted_elements_in_content:
+                if re.match(r'^[a-zA-Z0-9]+$', selector):
+                    for element in content_for_conversion.find_all(selector):
+                        element.decompose()
+                else:
+                    for element in content_for_conversion.select(selector):
+                        element.decompose()
+            markdown_output = convert_to_markdown(str(content_for_conversion))
+            # Post-processing: Clean up resulting Markdown
+            markdown_output = re.sub(r'\n\s*\n\s*\n+', '\n\n', markdown_output)
+            markdown_output = re.sub(r'^\s*[\*\-]\s*$', '', markdown_output, flags=re.MULTILINE)
+            markdown_output = re.sub(r'\*{3,}', '', markdown_output)
+            markdown_output = markdown_output.strip()
+            return markdown_output
+        except requests.exceptions.Timeout:
+            return "Error: Request timed out. The server took too long to respond."
+        except requests.exceptions.RequestException as e:
+            return f"Error fetching URL: {e}. Please check the URL or your internet connection."
+        except Exception as e:
+            return f"An unexpected error occurred during HTML conversion: {e}"
+    def parse_markdown_into_chunks(self, markdown_content: str) -> list:
+        """
+        Parses Markdown content into LlamaIndex nodes (chunks) and extracts title and content.
+        Adheres to SRP for parsing logic.
+        """
+        if not markdown_content or "Error fetching URL" in markdown_content or "An unexpected error occurred" in markdown_content:
+            return []
+        doc = Document(text=markdown_content, metadata={"filename": "webpage_content"})
+        parser = MarkdownNodeParser(include_metadata=True)
+        nodes = parser.get_nodes_from_documents([doc])
+        print(f"✅ Parsed {len(nodes)} nodes from Markdown.") # Debug print
+        structured_chunks = []
+        current_id = 0
+        for node in nodes:
+            pure_text_content = node.get_content(metadata_mode=MetadataMode.NONE).strip()
+            heading_title = ""
+            content_text = pure_text_content
+            heading_match = re.match(r"^(#+)\s*(.*)", pure_text_content)
+            if heading_match:
+                heading_title = heading_match.group(2).strip()
+                content_text = pure_text_content[len(heading_match.group(0)):].strip()
+                if not heading_title:
+                    heading_title = "[Untitled Section]"
+            else:
+                first_line = content_text.split('\n')[0].strip()
+                heading_title = first_line[:70].strip() + "..." if len(first_line) > 70 else first_line
+                if not heading_title:
+                    heading_title = "[Empty Section]"
+                elif not content_text:
+                        heading_title = "[Empty Section]"
+            structured_chunks.append({
+                "id": current_id,
+                "title": heading_title,
+                "content": content_text,
+                "original_node": node # Keep reference to the original LlamaIndex node
+            })
+            current_id += 1
+        return structured_chunks
+class ChunkManager:
+    """
+    Manages the collection of content chunks, their statistics, and target settings.
+    Adheres to SRP for chunk data management and OCP by allowing new statistics
+    or formatting without changing core chunk operations.
+    """
+    def __init__(self):
+        self._chunks = []
+        self.target_flesch_min = 60
+        self.target_grade_max = 8
+        self.target_min_chunk_words = 50
+        self.target_max_chunk_words = 500
+    def set_chunks(self, chunks: list):
+        """Sets the internal list of chunks and calculates their initial statistics."""
+        self._chunks = []
+        for chunk in chunks:
+            chunk['stats'] = self._calculate_chunk_stats(chunk['content'])
+            self._chunks.append(chunk)
+    def get_chunks(self) -> list:
+        """Returns the current list of processed chunks."""
+        return self._chunks
+    def _calculate_chunk_stats(self, text: str) -> dict:
+        """
+        Calculates various linguistic statistics for a given text chunk.
+        (Private helper method, SRP for stats calculation)
+        """
+        stats = {}
+        cleaned_text = re.sub(r'#+\s*', '', text)
+        cleaned_text = re.sub(r'[\*\-]\s*', '', cleaned_text)
+        cleaned_text = re.sub(r'\n\s*\n+', ' ', cleaned_text).strip()
+        stats['word_count'] = textstat.lexicon_count(cleaned_text, removepunct=True)
+        stats['char_count'] = len(cleaned_text)
+        stats['sentence_count'] = textstat.sentence_count(cleaned_text)
+        if stats['sentence_count'] > 0:
+            stats['avg_sentence_length'] = stats['word_count'] / stats['sentence_count']
+        else:
+            stats['avg_sentence_length'] = 0
+        stats['paragraph_count'] = cleaned_text.count('\n\n') + 1 if cleaned_text else 0
+        try:
+            stats['flesch_reading_ease'] = textstat.flesch_reading_ease(cleaned_text)
+        except Exception:
+            stats['flesch_reading_ease'] = 0
+        try:
+            stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(cleaned_text)
+        except Exception:
+            stats['flesch_kincaid_grade'] = 0
+        try:
+            stats['gunning_fog_score'] = textstat.gunning_fog(cleaned_text)
+        except Exception:
+            stats['gunning_fog_score'] = 0
+        return stats
+    def format_chunk_stats(self, stats: dict) -> str:
+        """
+        Formats chunk statistics into a readable string, including explanations for readability scores.
+        Adheres to SRP for formatting.
+        """
+        flesch_ease_color = "red" if stats['flesch_reading_ease'] < self.target_flesch_min else "green"
+        kincaid_grade_color = "red" if stats['flesch_kincaid_grade'] > self.target_grade_max else "green"
+        word_count_color = "red" if not (self.target_min_chunk_words <= stats['word_count'] <= self.target_max_chunk_words) else "green"
+        stats_str = "#### Chunk Statistics:\n"
+        stats_str += f"- **Word Count:** <span style='color:{word_count_color}'>{stats['word_count']}</span> (Target: {self.target_min_chunk_words}-{self.target_max_chunk_words})\n"
+        stats_str += f"- **Character Count:** {stats['char_count']}\n"
+        stats_str += f"- **Sentence Count:** {stats['sentence_count']}\n"
+        stats_str += f"- **Avg Sentence Length:** {stats['avg_sentence_length']:.2f} words\n"
+        stats_str += f"- **Paragraph Count:** {stats['paragraph_count']}\n"
+        stats_str += f"- **Flesch Reading Ease:** <span style='color:{flesch_ease_color}'>{stats['flesch_reading_ease']:.2f}</span> (Higher scores mean easier to read.)\n"
+        stats_str += f"- **Flesch-Kincaid Grade:** <span style='color:{kincaid_grade_color}'>{stats['flesch_kincaid_grade']:.2f}</span> (Indicates the U.S. grade level needed to understand the text.)\n"
+        stats_str += f"- **Gunning Fog Score:** {stats['gunning_fog_score']:.2f}\n"
+        return stats_str
+    def get_document_summary_stats(self) -> str:
+        """
+        Aggregates statistics for the entire document across all managed chunks.
+        Adheres to SRP for document-level summary.
+        """
+        if not self._chunks:
+            return "No document loaded to generate statistics."
+        total_words = 0
+        total_chars = 0
+        total_sentences = 0
+        total_paragraphs = 0
+        all_content_text = ""
+        for chunk in self._chunks:
+            content_text_for_stats = chunk['content']
+            # Re-calculate stats for each chunk content to ensure summary is up-to-date
+            current_chunk_stats = self._calculate_chunk_stats(content_text_for_stats)
+            total_words += current_chunk_stats['word_count']
+            total_chars += current_chunk_stats['char_count']
+            total_sentences += current_chunk_stats['sentence_count']
+            total_paragraphs += current_chunk_stats['paragraph_count']
+            all_content_text += content_text_for_stats + "\n\n"
+        doc_stats_str = "## Overall Document Statistics:\n"
+        doc_stats_str += f"- **Total Chunks:** {len(self._chunks)}\n"
+        doc_stats_str += f"- **Total Words:** {total_words}\n"
+        doc_stats_str += f"- **Total Characters:** {total_chars}\n"
+        doc_stats_str += f"- **Total Sentences:** {total_sentences}\n"
+        doc_stats_str += f"- **Total Paragraphs:** {total_paragraphs}\n"
+        if len(self._chunks) > 0:
+            doc_stats_str += f"- **Average Words per Chunk:** {total_words / len(self._chunks):.2f}\n"
+        if all_content_text.strip():
+            overall_stats = self._calculate_chunk_stats(all_content_text)
+            doc_stats_str += f"- **Overall Flesch Reading Ease:** {overall_stats['flesch_reading_ease']:.2f}\n"
+            doc_stats_str += f"- **Overall Flesch-Kincaid Grade Level:** {overall_stats['flesch_kincaid_grade']:.2f}\n"
+            doc_stats_str += f"- **Overall Gunning Fog Score:** {overall_stats['gunning_fog_score']:.2f}\n"
+            doc_stats_str += f"- **Overall Average Sentence Length:** {overall_stats['avg_sentence_length']:.2f} words\n"
+        else:
+            doc_stats_str += "- No content available for overall readability metrics.\n"
+        return doc_stats_str
+    def get_chunk_by_id(self, chunk_id: int) -> dict | None:
+        """Retrieves a chunk by its ID."""
+        return next((chunk for chunk in self._chunks if chunk["id"] == chunk_id), None)
+    def get_chunk_titles_for_dropdown(self) -> list:
+        """Generates dropdown choices using plain text (no HTML)."""
+        dropdown_choices = []
+        for chunk in self._chunks:
+            title = chunk['title']
+            dropdown_choices.append(f"{chunk['id']}: {title}")
+        return dropdown_choices
+    def update_chunk_content(self, chunk_id: int, new_content: str) -> bool:
+        """
+        Updates the content of a chunk, recalculates its stats, and updates its title if needed.
+        Returns True if successful, False otherwise.
+        """
+        for chunk in self._chunks:
+            if chunk["id"] == chunk_id:
+                chunk["content"] = new_content
+                chunk["stats"] = self._calculate_chunk_stats(new_content)
+                # Update chunk title if it was a placeholder or empty
+                if chunk["title"].startswith("[") and chunk["title"].endswith("]") or not chunk["title"]:
+                    first_line = new_content.split('\n')[0].strip()
+                    chunk["title"] = first_line[:70].strip() + "..." if len(first_line) > 70 else first_line
+                    if not chunk["title"]:
+                        chunk["title"] = "[Empty Section]"
+                    elif not new_content:
+                            chunk["title"] = "[Empty Section]"
+                return True
+        return False
+    def delete_chunk(self, chunk_id: int) -> bool:
+        """
+        Deletes a chunk by ID and re-indexes remaining chunks.
+        Returns True if successful, False otherwise.
+        """
+        initial_chunk_count = len(self._chunks)
+        self._chunks = [chunk for chunk in self._chunks if chunk["id"] != chunk_id]
+        if len(self._chunks) == initial_chunk_count:
+            return False # Chunk not found
+        # Re-index IDs to be sequential again
+        for i, chunk in enumerate(self._chunks):
+            chunk['id'] = i
+        return True
+    def get_final_markdown(self) -> str:
+        """Compiles all current chunks into a single Markdown string."""
+        final_md = ""
+        if not self._chunks:
+            return "No content to compile. Please process a URL first."
+        for chunk in self._chunks:
+            # Use H1 heading if title is meaningful
+            if not chunk["title"].startswith("[") and chunk["title"]:
+                final_md += f"# {chunk['title']}\n\n"
+            final_md += f"{chunk['content']}\n\n"
+        return final_md.strip()
+    def set_targets(self, flesch_min: float, grade_max: float, min_words: int, max_words: int):
+        """Sets the global readability and word count targets."""
+        self.target_flesch_min = flesch_min
+        self.target_grade_max = grade_max
+        self.target_min_chunk_words = min_words
+        self.target_max_chunk_words = max_words
+        # Recalculate stats for all chunks to reflect new targets in color coding (if displayed)
+        for chunk in self._chunks:
+            chunk['stats'] = self._calculate_chunk_stats(chunk['content'])
+# --- Streamlit UI Definition ---
+st.set_page_config(layout="wide", page_title="Chunk-Powered Webpage Editor")
+# Initialize session state
+if 'chunk_manager' not in st.session_state:
+    st.session_state.chunk_manager = ChunkManager()
+if 'content_processor' not in st.session_state:
+    st.session_state.content_processor = WebpageContentProcessor()
+if 'status_message' not in st.session_state:
+    st.session_state.status_message = ""
+if 'chunk_selector' not in st.session_state:
+    st.session_state.chunk_selector = None
+if 'chunk_content_editor' not in st.session_state:
+    st.session_state.chunk_content_editor = ""
+if 'final_markdown' not in st.session_state:
+    st.session_state.final_markdown = "Click 'Compile All Chunks' to see the final document with your edits."
+# Instantiate the managers
+content_processor = st.session_state.content_processor
+chunk_manager = st.session_state.chunk_manager
+st.markdown("# <center>✨ Chunk-Powered Webpage Editor ✨</center>", unsafe_allow_html=True)
+st.markdown(
+    "Enter a URL, fetch its content, and break it into editable 'chunks'. "
+    "Review statistics, set targets, edit chunks, and compile your final Markdown."
+)
+st.markdown("""<br><div style="display: flex; justify-content: flex-start; align-items: center; gap: 16px;">
+        <span>Runs best on Desktop. App created by <a href="https://www.linkedin.com/in/emilijagjorgjevska/" target="_blank">Emilija Gjorgjevska</a></span>
+        <a href="https://buymeacoffee.com/emiliagjorgjevska" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png"
+        alt="Buy Me A Coffee" style="height: 30px;"></a></div><br>""", unsafe_allow_html=True)
+# --- URL Input and Processing ---
+col1, col2 = st.columns([4, 1])
+with col1:
+    url_input = st.text_input(
+        label="Enter Webpage URL",
+        placeholder="e.g., https://www.llamaindex.ai/blog/what-is-llamaindex",
+        key="url_input"
+    )
+with col2:
+    st.write("") # Spacer
+    st.write("") # Spacer
+    process_button = st.button("Process URL", use_container_width=True)
+if st.session_state.status_message:
+    st.info(st.session_state.status_message)
+if process_button:
+    if not url_input:
+        st.session_state.status_message = "Please enter a URL to process."
+    else:
+        with st.spinner("Processing URL..."):
+            markdown_content = content_processor.fetch_and_convert_to_markdown(url_input)
+            if "Error" in markdown_content:
+                chunk_manager.set_chunks([])
+                st.session_state.status_message = markdown_content
+            else:
+                chunks = content_processor.parse_markdown_into_chunks(markdown_content)
+                chunk_manager.set_chunks(chunks)
+                st.session_state.status_message = "URL processed successfully!" if chunks else "URL processed, but no content chunks could be extracted."
+                if chunks:
+                    st.session_state.chunk_selector = chunk_manager.get_chunk_titles_for_dropdown()[0]
+                else:
+                    st.session_state.chunk_selector = None
+# --- Tabs for Editor and Overview ---
+tab1, tab2 = st.tabs(["Editor", "Document Overview & Targets"])
+with tab1:
+    st.markdown("## Edit Chunks Individually")
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        chunk_selector_options = chunk_manager.get_chunk_titles_for_dropdown()
+        if chunk_selector_options:
+            try:
+                # Find the index of the currently selected item to handle updates
+                current_selection_index = chunk_selector_options.index(st.session_state.chunk_selector)
+            except (ValueError, TypeError):
+                current_selection_index = 0
+            selected_chunk_title = st.selectbox(
+                label="Select Chunk to Edit",
+                options=chunk_selector_options,
+                index=current_selection_index,
+                key="chunk_selector"
+            )
+        else:
+            selected_chunk_title = st.selectbox(
+                label="Select Chunk to Edit",
+                options=["No chunks available"],
+                disabled=True
+            )
+    with col2:
+        nav_col1, nav_col2 = st.columns(2)
+        with nav_col1:
+            if st.button("⬅️ Previous Chunk", use_container_width=True):
+                if selected_chunk_title and "No chunks available" not in selected_chunk_title:
+                    current_id = int(selected_chunk_title.split(':')[0].strip())
+                    new_id = max(0, current_id - 1)
+                    new_chunk = chunk_manager.get_chunk_by_id(new_id)
+                    if new_chunk:
+                        st.session_state.chunk_selector = f"{new_chunk['id']}: {new_chunk['title']}"
+        with nav_col2:
+            if st.button("Next Chunk ➡️", use_container_width=True):
+                 if selected_chunk_title and "No chunks available" not in selected_chunk_title:
+                    current_id = int(selected_chunk_title.split(':')[0].strip())
+                    new_id = min(len(chunk_manager.get_chunks()) - 1, current_id + 1)
+                    new_chunk = chunk_manager.get_chunk_by_id(new_id)
+                    if new_chunk:
+                        st.session_state.chunk_selector = f"{new_chunk['id']}: {new_chunk['title']}"
+    # Get the currently selected chunk
+    selected_chunk = None
+    if selected_chunk_title and "No chunks available" not in selected_chunk_title:
+        current_id = int(selected_chunk_title.split(':')[0].strip())
+        selected_chunk = chunk_manager.get_chunk_by_id(current_id)
+    if selected_chunk:
+        st.text_input(
+            label="Chunk Title (Auto-detected)",
+            value=selected_chunk["title"],
+            disabled=True
+        )
+        chunk_content_editor = st.text_area(
+            label="Chunk Content",
+            value=selected_chunk["content"],
+            height=250,
+            key=f"editor_{selected_chunk['id']}" # Unique key to prevent state loss
+        )
+        st.markdown(
+            chunk_manager.format_chunk_stats(selected_chunk['stats']),
+            unsafe_allow_html=True
+        )
+        update_col, delete_col, _ = st.columns([1, 1, 3])
+        with update_col:
+            if st.button("Update Selected Chunk", use_container_width=True):
+                chunk_manager.update_chunk_content(selected_chunk['id'], chunk_content_editor)
+                st.session_state.status_message = f"Chunk '{selected_chunk_title}' updated successfully!"
+                # Force a re-render to update the dropdown with the new title
+                st.session_state.chunk_selector = f"{selected_chunk['id']}: {chunk_manager.get_chunk_by_id(selected_chunk['id'])['title']}"
+        with delete_col:
+            if st.button("Delete Selected Chunk", use_container_width=True):
+                chunk_manager.delete_chunk(selected_chunk['id'])
+                st.session_state.status_message = f"Chunk '{selected_chunk_title}' deleted successfully!"
+                if chunk_manager.get_chunks():
+                    st.session_state.chunk_selector = chunk_manager.get_chunk_titles_for_dropdown()[0]
+                else:
+                    st.session_state.chunk_selector = None
+    else:
+        st.text_input("Chunk Title (Auto-detected)", "Title of the selected chunk", disabled=True)
+        st.text_area("Chunk Content", "Content of the selected chunk will appear here for editing.", height=250, disabled=True)
+        st.markdown("Chunk statistics will appear here.")
+    st.markdown("---")
+    st.markdown("## Final Compiled Markdown")
+    if st.button("Compile All Chunks", use_container_width=True):
+        st.session_state.final_markdown = chunk_manager.get_final_markdown()
+    st.text_area(
+        label="Compiled Markdown",
+        value=st.session_state.final_markdown,
+        height=400,
+        key="final_markdown_output",
+        disabled=False
+    )
+with tab2:
+    st.markdown("## Document Summary Statistics")
+    st.markdown(chunk_manager.get_document_summary_stats(), unsafe_allow_html=True)
+    st.markdown("---")
+    st.markdown("## Content Targets")
+    st.markdown("Adjust these targets to guide your writing and see visual feedback in the chunk selector (green=good, red=needs attention).")
+    with st.form("targets_form"):
+        col1, col2 = st.columns(2)
+        with col1:
+            target_flesch_min_input = st.number_input("Min Flesch Reading Ease", value=float(chunk_manager.target_flesch_min))
+            target_min_chunk_words_input = st.number_input("Min Chunk Words", value=chunk_manager.target_min_chunk_words)
+        with col2:
+            target_grade_max_input = st.number_input("Max Flesch-Kincaid Grade", value=float(chunk_manager.target_grade_max))
+            target_max_chunk_words_input = st.number_input("Max Chunk Words", value=chunk_manager.target_max_chunk_words)
+        submitted = st.form_submit_button("Set New Targets", use_container_width=True)
+        if submitted:
+            chunk_manager.set_targets(
+                target_flesch_min_input,
+                target_grade_max_input,
+                int(target_min_chunk_words_input),
+                int(target_max_chunk_words_input)
+            )
+            st.session_state.status_message = "Target settings updated."
             st.rerun()