Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from html_to_markdown import convert_to_markdown | |
| import re | |
| from llama_index.core.node_parser import MarkdownNodeParser | |
| from llama_index.core.schema import Document, MetadataMode | |
| import textstat # For readability metrics | |
| class WebpageContentProcessor: | |
| """ | |
| Handles fetching, converting, and parsing webpage content into structured chunks. | |
| Adheres to the Single Responsibility Principle (SRP) for content processing. | |
| """ | |
| def __init__(self): | |
| pass | |
| def fetch_and_convert_to_markdown(self, url: str) -> str: | |
| """ | |
| Fetches HTML content from a given URL, attempts to isolate the main content, | |
| removes common boilerplate, and converts to Markdown. | |
| Prioritizes semantic content tags over H1-based identification for robust extraction. | |
| """ | |
| try: | |
| response = requests.get(url, timeout=10) # Add a timeout for robustness | |
| response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx) | |
| html_content = response.text | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| # Aggressive initial removal of script, style, and meta tags that are never content. | |
| for tag_name in ['script', 'style', 'noscript', 'meta', 'link']: | |
| for element in soup.find_all(tag_name): | |
| element.decompose() | |
| content_for_conversion = None | |
| # Prioritize finding main content containers first (semantic tags and common divs) | |
| content_for_conversion = soup.find('article') or soup.find('main') or \ | |
| soup.find('div', class_='main-content') or \ | |
| soup.find('div', {'role': 'main'}) | |
| # Fallback logic if main content container wasn't found | |
| if not content_for_conversion: | |
| first_h1 = soup.find('h1') | |
| if first_h1: | |
| candidate_container = first_h1.parent | |
| found_main_wrapper_via_h1_parent = False | |
| # Check up to 5 parent levels for a suitable content wrapper | |
| for _ in range(5): | |
| if candidate_container is None: | |
| break | |
| if candidate_container.name in ['article', 'main', 'section', 'div'] and \ | |
| any(cls in candidate_container.get('class', []) for cls in ['content', 'post-body', 'article-content', 'entry-content', 'main-content']) or \ | |
| candidate_container.get('role') == 'main': | |
| content_for_conversion = candidate_container | |
| found_main_wrapper_via_h1_parent = True | |
| break | |
| candidate_container = candidate_container.parent | |
| # If no clear wrapper found via H1's parent, take H1 and its direct siblings as a fallback | |
| if not found_main_wrapper_via_h1_parent: | |
| temp_soup = BeautifulSoup('', 'html.parser') | |
| temp_soup.append(first_h1) | |
| current_element = first_h1.next_sibling | |
| while current_element: | |
| temp_soup.append(current_element) | |
| current_element = current_element.next_sibling | |
| content_for_conversion = temp_soup | |
| else: | |
| # Ultimate fallback: use the entire body if no specific content tags or H1 found | |
| content_for_conversion = soup.body | |
| if not content_for_conversion: | |
| return "Error: Could not identify main content for conversion." | |
| # Selective boilerplate removal within the *identified* main content tag | |
| unwanted_elements_in_content = [ | |
| 'nav', 'header', 'footer', 'aside', 'iframe', 'form', 'button', 'input', | |
| 'textarea', 'svg', 'canvas', 'audio', 'video', 'picture', 'source', 'track', | |
| 'map', 'area', 'embed', 'object', 'param', 'applet', 'bgsound', 'frame', | |
| 'frameset', 'noframes', 'template', 'slot', 'portal', 'datalist', 'keygen', | |
| 'output', 'progress', 'meter', 'details', 'summary', 'dialog', 'menu', | |
| 'menuitem', 'command', 'hr', 'figure', 'figcaption', 'cite', | |
| '.social-share', '.comments', '.related-posts', '.pagination', | |
| '.breadcrumbs', '.pop-up', '.modal', '.overlay', '.cookie-consent', | |
| '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]', | |
| '[role="complementary"]', '[role="search"]', '[role="menubar"]', '[role="toolbar"]', | |
| '[class*="utility"]', '[class*="global-nav"]', '[class*="skip"]', '[class*="toast"]', | |
| '[class*="announcement"]', '[class*="fixed-bottom"]', '[class*="fixed-top"]', | |
| '[id*="promo"]', '[id*="ad"]', '[id*="banner"]', '[id*="popup"]', '[id*="modal"]', | |
| '[id*="overlay"]', '[id*="cookie"]', '[id*="skip"]', '[id*="navbar"]', '[id*="menu"]', | |
| '.hidden', '.visually-hidden', | |
| '.no-print', '.print-hide', | |
| '.wp-block-navigation', '.wp-block-group.is-style-stripes', | |
| '[class*="column"]', '[class*="grid"]' | |
| ] | |
| for selector in unwanted_elements_in_content: | |
| if re.match(r'^[a-zA-Z0-9]+$', selector): | |
| for element in content_for_conversion.find_all(selector): | |
| element.decompose() | |
| else: | |
| for element in content_for_conversion.select(selector): | |
| element.decompose() | |
| markdown_output = convert_to_markdown(str(content_for_conversion)) | |
| # Post-processing: Clean up resulting Markdown | |
| markdown_output = re.sub(r'\n\s*\n\s*\n+', '\n\n', markdown_output) | |
| markdown_output = re.sub(r'^\s*[\*\-]\s*$', '', markdown_output, flags=re.MULTILINE) | |
| markdown_output = re.sub(r'\*{3,}', '', markdown_output) | |
| markdown_output = markdown_output.strip() | |
| return markdown_output | |
| except requests.exceptions.Timeout: | |
| return "Error: Request timed out. The server took too long to respond." | |
| except requests.exceptions.RequestException as e: | |
| return f"Error fetching URL: {e}. Please check the URL or your internet connection." | |
| except Exception as e: | |
| return f"An unexpected error occurred during HTML conversion: {e}" | |
| def parse_markdown_into_chunks(self, markdown_content: str) -> list: | |
| """ | |
| Parses Markdown content into LlamaIndex nodes (chunks) and extracts title and content. | |
| Adheres to SRP for parsing logic. | |
| """ | |
| if not markdown_content or "Error fetching URL" in markdown_content or "An unexpected error occurred" in markdown_content: | |
| return [] | |
| doc = Document(text=markdown_content, metadata={"filename": "webpage_content"}) | |
| parser = MarkdownNodeParser(include_metadata=True) | |
| nodes = parser.get_nodes_from_documents([doc]) | |
| print(f"✅ Parsed {len(nodes)} nodes from Markdown.") # Debug print | |
| structured_chunks = [] | |
| current_id = 0 | |
| for node in nodes: | |
| pure_text_content = node.get_content(metadata_mode=MetadataMode.NONE).strip() | |
| heading_title = "" | |
| content_text = pure_text_content | |
| heading_match = re.match(r"^(#+)\s*(.*)", pure_text_content) | |
| if heading_match: | |
| heading_title = heading_match.group(2).strip() | |
| content_text = pure_text_content[len(heading_match.group(0)):].strip() | |
| if not heading_title: | |
| heading_title = "[Untitled Section]" | |
| else: | |
| first_line = content_text.split('\n')[0].strip() | |
| heading_title = first_line[:70].strip() + "..." if len(first_line) > 70 else first_line | |
| if not heading_title: | |
| heading_title = "[Empty Section]" | |
| elif not content_text: | |
| heading_title = "[Empty Section]" | |
| structured_chunks.append({ | |
| "id": current_id, | |
| "title": heading_title, | |
| "content": content_text, | |
| "original_node": node # Keep reference to the original LlamaIndex node | |
| }) | |
| current_id += 1 | |
| return structured_chunks | |
| class ChunkManager: | |
| """ | |
| Manages the collection of content chunks, their statistics, and target settings. | |
| Adheres to SRP for chunk data management and OCP by allowing new statistics | |
| or formatting without changing core chunk operations. | |
| """ | |
| def __init__(self): | |
| self._chunks = [] | |
| self.target_flesch_min = 60 | |
| self.target_grade_max = 8 | |
| self.target_min_chunk_words = 50 | |
| self.target_max_chunk_words = 500 | |
| def set_chunks(self, chunks: list): | |
| """Sets the internal list of chunks and calculates their initial statistics.""" | |
| self._chunks = [] | |
| for chunk in chunks: | |
| chunk['stats'] = self._calculate_chunk_stats(chunk['content']) | |
| self._chunks.append(chunk) | |
| def get_chunks(self) -> list: | |
| """Returns the current list of processed chunks.""" | |
| return self._chunks | |
| def _calculate_chunk_stats(self, text: str) -> dict: | |
| """ | |
| Calculates various linguistic statistics for a given text chunk. | |
| (Private helper method, SRP for stats calculation) | |
| """ | |
| stats = {} | |
| cleaned_text = re.sub(r'#+\s*', '', text) | |
| cleaned_text = re.sub(r'[\*\-]\s*', '', cleaned_text) | |
| cleaned_text = re.sub(r'\n\s*\n+', ' ', cleaned_text).strip() | |
| stats['word_count'] = textstat.lexicon_count(cleaned_text, removepunct=True) | |
| stats['char_count'] = len(cleaned_text) | |
| stats['sentence_count'] = textstat.sentence_count(cleaned_text) | |
| if stats['sentence_count'] > 0: | |
| stats['avg_sentence_length'] = stats['word_count'] / stats['sentence_count'] | |
| else: | |
| stats['avg_sentence_length'] = 0 | |
| stats['paragraph_count'] = cleaned_text.count('\n\n') + 1 if cleaned_text else 0 | |
| try: | |
| stats['flesch_reading_ease'] = textstat.flesch_reading_ease(cleaned_text) | |
| except Exception: | |
| stats['flesch_reading_ease'] = 0 | |
| try: | |
| stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(cleaned_text) | |
| except Exception: | |
| stats['flesch_kincaid_grade'] = 0 | |
| try: | |
| stats['gunning_fog_score'] = textstat.gunning_fog(cleaned_text) | |
| except Exception: | |
| stats['gunning_fog_score'] = 0 | |
| return stats | |
| def format_chunk_stats(self, stats: dict) -> str: | |
| """ | |
| Formats chunk statistics into a readable string, including explanations for readability scores. | |
| Adheres to SRP for formatting. | |
| """ | |
| flesch_ease_color = "red" if stats['flesch_reading_ease'] < self.target_flesch_min else "green" | |
| kincaid_grade_color = "red" if stats['flesch_kincaid_grade'] > self.target_grade_max else "green" | |
| word_count_color = "red" if not (self.target_min_chunk_words <= stats['word_count'] <= self.target_max_chunk_words) else "green" | |
| stats_str = "#### Chunk Statistics:\n" | |
| stats_str += f"- **Word Count:** <span style='color:{word_count_color}'>{stats['word_count']}</span> (Target: {self.target_min_chunk_words}-{self.target_max_chunk_words})\n" | |
| stats_str += f"- **Character Count:** {stats['char_count']}\n" | |
| stats_str += f"- **Sentence Count:** {stats['sentence_count']}\n" | |
| stats_str += f"- **Avg Sentence Length:** {stats['avg_sentence_length']:.2f} words\n" | |
| stats_str += f"- **Paragraph Count:** {stats['paragraph_count']}\n" | |
| stats_str += f"- **Flesch Reading Ease:** <span style='color:{flesch_ease_color}'>{stats['flesch_reading_ease']:.2f}</span> (Higher scores mean easier to read.)\n" | |
| stats_str += f"- **Flesch-Kincaid Grade:** <span style='color:{kincaid_grade_color}'>{stats['flesch_kincaid_grade']:.2f}</span> (Indicates the U.S. grade level needed to understand the text.)\n" | |
| stats_str += f"- **Gunning Fog Score:** {stats['gunning_fog_score']:.2f}\n" | |
| return stats_str | |
| def get_document_summary_stats(self) -> str: | |
| """ | |
| Aggregates statistics for the entire document across all managed chunks. | |
| Adheres to SRP for document-level summary. | |
| """ | |
| if not self._chunks: | |
| return "No document loaded to generate statistics." | |
| total_words = 0 | |
| total_chars = 0 | |
| total_sentences = 0 | |
| total_paragraphs = 0 | |
| all_content_text = "" | |
| for chunk in self._chunks: | |
| content_text_for_stats = chunk['content'] | |
| # Re-calculate stats for each chunk content to ensure summary is up-to-date | |
| current_chunk_stats = self._calculate_chunk_stats(content_text_for_stats) | |
| total_words += current_chunk_stats['word_count'] | |
| total_chars += current_chunk_stats['char_count'] | |
| total_sentences += current_chunk_stats['sentence_count'] | |
| total_paragraphs += current_chunk_stats['paragraph_count'] | |
| all_content_text += content_text_for_stats + "\n\n" | |
| doc_stats_str = "## Overall Document Statistics:\n" | |
| doc_stats_str += f"- **Total Chunks:** {len(self._chunks)}\n" | |
| doc_stats_str += f"- **Total Words:** {total_words}\n" | |
| doc_stats_str += f"- **Total Characters:** {total_chars}\n" | |
| doc_stats_str += f"- **Total Sentences:** {total_sentences}\n" | |
| doc_stats_str += f"- **Total Paragraphs:** {total_paragraphs}\n" | |
| if len(self._chunks) > 0: | |
| doc_stats_str += f"- **Average Words per Chunk:** {total_words / len(self._chunks):.2f}\n" | |
| if all_content_text.strip(): | |
| overall_stats = self._calculate_chunk_stats(all_content_text) | |
| doc_stats_str += f"- **Overall Flesch Reading Ease:** {overall_stats['flesch_reading_ease']:.2f}\n" | |
| doc_stats_str += f"- **Overall Flesch-Kincaid Grade Level:** {overall_stats['flesch_kincaid_grade']:.2f}\n" | |
| doc_stats_str += f"- **Overall Gunning Fog Score:** {overall_stats['gunning_fog_score']:.2f}\n" | |
| doc_stats_str += f"- **Overall Average Sentence Length:** {overall_stats['avg_sentence_length']:.2f} words\n" | |
| else: | |
| doc_stats_str += "- No content available for overall readability metrics.\n" | |
| return doc_stats_str | |
| def get_chunk_by_id(self, chunk_id: int) -> dict | None: | |
| """Retrieves a chunk by its ID.""" | |
| return next((chunk for chunk in self._chunks if chunk["id"] == chunk_id), None) | |
| def get_chunk_titles_for_dropdown(self) -> list: | |
| """Generates dropdown choices using plain text (no HTML).""" | |
| dropdown_choices = [] | |
| for chunk in self._chunks: | |
| title = chunk['title'] | |
| dropdown_choices.append(f"{chunk['id']}: {title}") | |
| return dropdown_choices | |
| def update_chunk_content(self, chunk_id: int, new_content: str) -> bool: | |
| """ | |
| Updates the content of a chunk, recalculates its stats, and updates its title if needed. | |
| Returns True if successful, False otherwise. | |
| """ | |
| for chunk in self._chunks: | |
| if chunk["id"] == chunk_id: | |
| chunk["content"] = new_content | |
| chunk["stats"] = self._calculate_chunk_stats(new_content) | |
| # Update chunk title if it was a placeholder or empty | |
| if chunk["title"].startswith("[") and chunk["title"].endswith("]") or not chunk["title"]: | |
| first_line = new_content.split('\n')[0].strip() | |
| chunk["title"] = first_line[:70].strip() + "..." if len(first_line) > 70 else first_line | |
| if not chunk["title"]: | |
| chunk["title"] = "[Empty Section]" | |
| elif not new_content: | |
| chunk["title"] = "[Empty Section]" | |
| return True | |
| return False | |
| def delete_chunk(self, chunk_id: int) -> bool: | |
| """ | |
| Deletes a chunk by ID and re-indexes remaining chunks. | |
| Returns True if successful, False otherwise. | |
| """ | |
| initial_chunk_count = len(self._chunks) | |
| self._chunks = [chunk for chunk in self._chunks if chunk["id"] != chunk_id] | |
| if len(self._chunks) == initial_chunk_count: | |
| return False # Chunk not found | |
| # Re-index IDs to be sequential again | |
| for i, chunk in enumerate(self._chunks): | |
| chunk['id'] = i | |
| return True | |
| def get_final_markdown(self) -> str: | |
| """Compiles all current chunks into a single Markdown string.""" | |
| final_md = "" | |
| if not self._chunks: | |
| return "No content to compile. Please process a URL first." | |
| for chunk in self._chunks: | |
| # Use H1 heading if title is meaningful | |
| if not chunk["title"].startswith("[") and chunk["title"]: | |
| final_md += f"# {chunk['title']}\n\n" | |
| final_md += f"{chunk['content']}\n\n" | |
| return final_md.strip() | |
| def set_targets(self, flesch_min: float, grade_max: float, min_words: int, max_words: int): | |
| """Sets the global readability and word count targets.""" | |
| self.target_flesch_min = flesch_min | |
| self.target_grade_max = grade_max | |
| self.target_min_chunk_words = min_words | |
| self.target_max_chunk_words = max_words | |
| # Recalculate stats for all chunks to reflect new targets in color coding (if displayed) | |
| for chunk in self._chunks: | |
| chunk['stats'] = self._calculate_chunk_stats(chunk['content']) | |
| # --- Streamlit UI Definition --- | |
| st.set_page_config(layout="wide", page_title="Chunk-Powered Webpage Editor") | |
| # Initialize session state | |
| if 'chunk_manager' not in st.session_state: | |
| st.session_state.chunk_manager = ChunkManager() | |
| if 'content_processor' not in st.session_state: | |
| st.session_state.content_processor = WebpageContentProcessor() | |
| if 'status_message' not in st.session_state: | |
| st.session_state.status_message = "" | |
| if 'chunk_selector' not in st.session_state: | |
| st.session_state.chunk_selector = None | |
| if 'chunk_content_editor' not in st.session_state: | |
| st.session_state.chunk_content_editor = "" | |
| if 'final_markdown' not in st.session_state: | |
| st.session_state.final_markdown = "Click 'Compile All Chunks' to see the final document with your edits." | |
| # Instantiate the managers | |
| content_processor = st.session_state.content_processor | |
| chunk_manager = st.session_state.chunk_manager | |
| st.markdown("# <center>✨ Chunk-Powered Webpage Editor ✨</center>", unsafe_allow_html=True) | |
| st.info( | |
| "ℹ️ **Please Note:**\n\n" | |
| "- Some URLs may be inaccessible due to restrictive server policies (e.g., firewalls or bot detection).\n" | |
| "- This is an early version of the app, and you may encounter some bugs." | |
| ) | |
| st.markdown("""Enter a URL, fetch its content, and break it into editable 'chunks'. Review statistics, set targets, edit chunks, and compile your final Markdown.<div style="font-size: 0.9em; margin-bottom: 12px;"> | |
| Inspired by <a href="https://www.linkedin.com/pulse/understanding-chunking-google-ai-mode-practical-content-volpini-zseaf/" target="_blank">Andrea Volpini</a></div><div style="display: flex; justify-content: flex-start; align-items: center; gap: 16px;"> | |
| <span>Runs best on Desktop. App created by <a href="https://www.linkedin.com/in/emilijagjorgjevska/" target="_blank">Emilija Gjorgjevska</a></span> | |
| <a href="https://buymeacoffee.com/emiliagjorgjevska" target="_blank"> | |
| <img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" style="height: 30px;"> | |
| </a></div><br>""", unsafe_allow_html=True) | |
| # --- URL Input and Processing --- | |
| col1, col2 = st.columns([4, 1]) | |
| with col1: | |
| url_input = st.text_input( | |
| label="Enter Webpage URL", | |
| placeholder="e.g., https://www.llamaindex.ai/blog/what-is-llamaindex", | |
| key="url_input" | |
| ) | |
| with col2: | |
| st.write("") # Spacer | |
| st.write("") # Spacer | |
| process_button = st.button("Process URL", use_container_width=True) | |
| if st.session_state.status_message: | |
| st.info(st.session_state.status_message) | |
| if process_button: | |
| if not url_input: | |
| st.session_state.status_message = "Please enter a URL to process." | |
| else: | |
| with st.spinner("Processing URL..."): | |
| markdown_content = content_processor.fetch_and_convert_to_markdown(url_input) | |
| if "Error" in markdown_content: | |
| chunk_manager.set_chunks([]) | |
| st.session_state.status_message = markdown_content | |
| else: | |
| chunks = content_processor.parse_markdown_into_chunks(markdown_content) | |
| chunk_manager.set_chunks(chunks) | |
| st.session_state.status_message = "URL processed successfully!" if chunks else "URL processed, but no content chunks could be extracted." | |
| if chunks: | |
| st.session_state.chunk_selector = chunk_manager.get_chunk_titles_for_dropdown()[0] | |
| else: | |
| st.session_state.chunk_selector = None | |
| # --- Tabs for Editor and Overview --- | |
| tab1, tab2 = st.tabs(["Editor", "Document Overview & Targets"]) | |
| with tab1: | |
| st.markdown("## Edit Chunks Individually") | |
| chunk_selector_options = chunk_manager.get_chunk_titles_for_dropdown() | |
| if chunk_selector_options: | |
| try: | |
| # Find the index of the currently selected item to handle updates | |
| current_selection_index = chunk_selector_options.index(st.session_state.chunk_selector) | |
| except (ValueError, TypeError): | |
| current_selection_index = 0 | |
| selected_chunk_title = st.selectbox( | |
| label="Select Chunk to Edit", | |
| options=chunk_selector_options, | |
| index=current_selection_index, | |
| key="chunk_selector" | |
| ) | |
| else: | |
| selected_chunk_title = st.selectbox( | |
| label="Select Chunk to Edit", | |
| options=["No chunks available"], | |
| disabled=True | |
| ) | |
| # Get the currently selected chunk | |
| selected_chunk = None | |
| if selected_chunk_title and "No chunks available" not in selected_chunk_title: | |
| current_id = int(selected_chunk_title.split(':')[0].strip()) | |
| selected_chunk = chunk_manager.get_chunk_by_id(current_id) | |
| if selected_chunk: | |
| st.text_input( | |
| label="Chunk Title (Auto-detected)", | |
| value=selected_chunk["title"], | |
| disabled=True | |
| ) | |
| chunk_content_editor = st.text_area( | |
| label="Chunk Content", | |
| value=selected_chunk["content"], | |
| height=250, | |
| key=f"editor_{selected_chunk['id']}" # Unique key to prevent state loss | |
| ) | |
| st.markdown( | |
| chunk_manager.format_chunk_stats(selected_chunk['stats']), | |
| unsafe_allow_html=True | |
| ) | |
| update_col, delete_col, _ = st.columns([1, 1, 3]) | |
| with update_col: | |
| if st.button("Update Selected Chunk", use_container_width=True): | |
| chunk_manager.update_chunk_content(selected_chunk['id'], chunk_content_editor) | |
| st.session_state.status_message = f"Chunk '{selected_chunk_title}' updated successfully!" | |
| # Force a re-render to update the dropdown with the new title | |
| st.session_state.chunk_selector = f"{selected_chunk['id']}: {chunk_manager.get_chunk_by_id(selected_chunk['id'])['title']}" | |
| with delete_col: | |
| if st.button("Delete Selected Chunk", use_container_width=True): | |
| chunk_manager.delete_chunk(selected_chunk['id']) | |
| st.session_state.status_message = f"Chunk '{selected_chunk_title}' deleted successfully!" | |
| if chunk_manager.get_chunks(): | |
| st.session_state.chunk_selector = chunk_manager.get_chunk_titles_for_dropdown()[0] | |
| else: | |
| st.session_state.chunk_selector = None | |
| else: | |
| st.text_input("Chunk Title (Auto-detected)", "Title of the selected chunk", disabled=True) | |
| st.text_area("Chunk Content", "Content of the selected chunk will appear here for editing.", height=250, disabled=True) | |
| st.markdown("Chunk statistics will appear here.") | |
| st.markdown("---") | |
| st.markdown("## Final Compiled Markdown") | |
| if st.button("Compile All Chunks", use_container_width=True): | |
| st.session_state.final_markdown = chunk_manager.get_final_markdown() | |
| st.text_area( | |
| label="Compiled Markdown", | |
| value=st.session_state.final_markdown, | |
| height=400, | |
| key="final_markdown_output", | |
| disabled=False | |
| ) | |
| with tab2: | |
| st.markdown("## Document Summary Statistics") | |
| st.markdown(chunk_manager.get_document_summary_stats(), unsafe_allow_html=True) | |
| st.markdown("---") | |
| st.markdown("## Content Targets") | |
| st.markdown("Adjust these targets to guide your writing and see visual feedback in the chunk selector (green=good, red=needs attention).") | |
| with st.form("targets_form"): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| target_flesch_min_input = st.number_input("Min Flesch Reading Ease", value=float(chunk_manager.target_flesch_min)) | |
| target_min_chunk_words_input = st.number_input("Min Chunk Words", value=chunk_manager.target_min_chunk_words) | |
| with col2: | |
| target_grade_max_input = st.number_input("Max Flesch-Kincaid Grade", value=float(chunk_manager.target_grade_max)) | |
| target_max_chunk_words_input = st.number_input("Max Chunk Words", value=chunk_manager.target_max_chunk_words) | |
| submitted = st.form_submit_button("Set New Targets", use_container_width=True) | |
| if submitted: | |
| chunk_manager.set_targets( | |
| target_flesch_min_input, | |
| target_grade_max_input, | |
| int(target_min_chunk_words_input), | |
| int(target_max_chunk_words_input) | |
| ) | |
| st.session_state.status_message = "Target settings updated." | |
| st.rerun() |