Spaces:

Em4e
/

chunk-based-text-editor

Sleeping

App Files Files Community

Em4e commited on Jun 9, 2025

Commit

35de2ed

verified ·

1 Parent(s): 5c1ddc7

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -120

app.py CHANGED Viewed

@@ -25,9 +25,7 @@ class WebpageContentProcessor:
             response = requests.get(url, timeout=10) # Add a timeout for robustness
             response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
             html_content = response.text
             soup = BeautifulSoup(html_content, 'html.parser')
             # Aggressive initial removal of script, style, and meta tags that are never content.
             for tag_name in ['script', 'style', 'noscript', 'meta', 'link']:
                 for element in soup.find_all(tag_name):
@@ -41,23 +39,23 @@ class WebpageContentProcessor:
                                      soup.find('div', {'role': 'main'})
             # Fallback logic if main content container wasn't found
-            if not content_for_conversion:
-                first_h1 = soup.find('h1')
-                if first_h1:
                     candidate_container = first_h1.parent
                     found_main_wrapper_via_h1_parent = False
                     # Check up to 5 parent levels for a suitable content wrapper
-                    for _ in range(5):
-                        if candidate_container is None:
                             break
-                        if candidate_container.name in ['article', 'main', 'section', 'div'] and \
                            any(cls in candidate_container.get('class', []) for cls in ['content', 'post-body', 'article-content', 'entry-content', 'main-content']) or \
                            candidate_container.get('role') == 'main':
                             content_for_conversion = candidate_container
                             found_main_wrapper_via_h1_parent = True
                             break
-                        candidate_container = candidate_container.parent
                     # If no clear wrapper found via H1's parent, take H1 and its direct siblings as a fallback
                     if not found_main_wrapper_via_h1_parent:
                         temp_soup = BeautifulSoup('', 'html.parser')
@@ -67,7 +65,7 @@ class WebpageContentProcessor:
                             temp_soup.append(current_element)
                             current_element = current_element.next_sibling
                         content_for_conversion = temp_soup
-                else:
                     # Ultimate fallback: use the entire body if no specific content tags or H1 found
                     content_for_conversion = soup.body
@@ -81,7 +79,7 @@ class WebpageContentProcessor:
                 'map', 'area', 'embed', 'object', 'param', 'applet', 'bgsound', 'frame',
                 'frameset', 'noframes', 'template', 'slot', 'portal', 'datalist', 'keygen',
                 'output', 'progress', 'meter', 'details', 'summary', 'dialog', 'menu',
-                'menuitem', 'command', 'hr', 'figure', 'figcaption', 'cite',
                 '.social-share', '.comments', '.related-posts', '.pagination',
                 '.breadcrumbs', '.pop-up', '.modal', '.overlay', '.cookie-consent',
                 '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]',
@@ -90,26 +88,26 @@ class WebpageContentProcessor:
                 '[class*="announcement"]', '[class*="fixed-bottom"]', '[class*="fixed-top"]',
                 '[id*="promo"]', '[id*="ad"]', '[id*="banner"]', '[id*="popup"]', '[id*="modal"]',
                 '[id*="overlay"]', '[id*="cookie"]', '[id*="skip"]', '[id*="navbar"]', '[id*="menu"]',
-                '.hidden', '.visually-hidden',
-                '.no-print', '.print-hide',
-                '.wp-block-navigation', '.wp-block-group.is-style-stripes',
-                '[class*="column"]', '[class*="grid"]'
-            ]
             for selector in unwanted_elements_in_content:
-                if re.match(r'^[a-zA-Z0-9]+$', selector):
-                    for element in content_for_conversion.find_all(selector):
                         element.decompose()
-                else:
-                    for element in content_for_conversion.select(selector):
                         element.decompose()
             markdown_output = convert_to_markdown(str(content_for_conversion))
             # Post-processing: Clean up resulting Markdown
             markdown_output = re.sub(r'\n\s*\n\s*\n+', '\n\n', markdown_output)
             markdown_output = re.sub(r'^\s*[\*\-]\s*$', '', markdown_output, flags=re.MULTILINE)
-            markdown_output = re.sub(r'\*{3,}', '', markdown_output)
             markdown_output = markdown_output.strip()
             return markdown_output
@@ -135,21 +133,19 @@ class WebpageContentProcessor:
         print(f"✅ Parsed {len(nodes)} nodes from Markdown.") # Debug print
         structured_chunks = []
-        current_id = 0
         for node in nodes:
             pure_text_content = node.get_content(metadata_mode=MetadataMode.NONE).strip()
             heading_title = ""
             content_text = pure_text_content
             heading_match = re.match(r"^(#+)\s*(.*)", pure_text_content)
             if heading_match:
                 heading_title = heading_match.group(2).strip()
                 content_text = pure_text_content[len(heading_match.group(0)):].strip()
-                if not heading_title:
-                    heading_title = "[Untitled Section]"
             else:
                 first_line = content_text.split('\n')[0].strip()
                 heading_title = first_line[:70].strip() + "..." if len(first_line) > 70 else first_line
@@ -165,7 +161,7 @@ class WebpageContentProcessor:
                 "original_node": node # Keep reference to the original LlamaIndex node
             })
             current_id += 1
         return structured_chunks
 class ChunkManager:
@@ -198,14 +194,14 @@ class ChunkManager:
         (Private helper method, SRP for stats calculation)
         """
         stats = {}
-        cleaned_text = re.sub(r'#+\s*', '', text)
-        cleaned_text = re.sub(r'[\*\-]\s*', '', cleaned_text)
-        cleaned_text = re.sub(r'\n\s*\n+', ' ', cleaned_text).strip()
         stats['word_count'] = textstat.lexicon_count(cleaned_text, removepunct=True)
         stats['char_count'] = len(cleaned_text)
         stats['sentence_count'] = textstat.sentence_count(cleaned_text)
         if stats['sentence_count'] > 0:
             stats['avg_sentence_length'] = stats['word_count'] / stats['sentence_count']
         else:
@@ -217,17 +213,16 @@ class ChunkManager:
             stats['flesch_reading_ease'] = textstat.flesch_reading_ease(cleaned_text)
         except Exception:
             stats['flesch_reading_ease'] = 0
         try:
             stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(cleaned_text)
         except Exception:
             stats['flesch_kincaid_grade'] = 0
         try:
             stats['gunning_fog_score'] = textstat.gunning_fog(cleaned_text)
         except Exception:
             stats['gunning_fog_score'] = 0
         return stats
     def format_chunk_stats(self, stats: dict) -> str:
@@ -262,7 +257,7 @@ class ChunkManager:
         total_chars = 0
         total_sentences = 0
         total_paragraphs = 0
         all_content_text = ""
         for chunk in self._chunks:
             content_text_for_stats = chunk['content']
@@ -280,10 +275,10 @@ class ChunkManager:
         doc_stats_str += f"- **Total Characters:** {total_chars}\n"
         doc_stats_str += f"- **Total Sentences:** {total_sentences}\n"
         doc_stats_str += f"- **Total Paragraphs:** {total_paragraphs}\n"
         if len(self._chunks) > 0:
             doc_stats_str += f"- **Average Words per Chunk:** {total_words / len(self._chunks):.2f}\n"
         if all_content_text.strip():
             overall_stats = self._calculate_chunk_stats(all_content_text)
             doc_stats_str += f"- **Overall Flesch Reading Ease:** {overall_stats['flesch_reading_ease']:.2f}\n"
@@ -292,7 +287,6 @@ class ChunkManager:
             doc_stats_str += f"- **Overall Average Sentence Length:** {overall_stats['avg_sentence_length']:.2f} words\n"
         else:
             doc_stats_str += "- No content available for overall readability metrics.\n"
         return doc_stats_str
     def get_chunk_by_id(self, chunk_id: int) -> dict | None:
@@ -336,25 +330,21 @@ class ChunkManager:
         self._chunks = [chunk for chunk in self._chunks if chunk["id"] != chunk_id]
         if len(self._chunks) == initial_chunk_count:
             return False # Chunk not found
         # Re-index IDs to be sequential again
         for i, chunk in enumerate(self._chunks):
             chunk['id'] = i
         return True
     def get_final_markdown(self) -> str:
         """Compiles all current chunks into a single Markdown string."""
         final_md = ""
         if not self._chunks:
             return "No content to compile. Please process a URL first."
         for chunk in self._chunks:
             # Use H1 heading if title is meaningful
             if not chunk["title"].startswith("[") and chunk["title"]:
                 final_md += f"# {chunk['title']}\n\n"
             final_md += f"{chunk['content']}\n\n"
         return final_md.strip()
     def set_targets(self, flesch_min: float, grade_max: float, min_words: int, max_words: int):
@@ -365,7 +355,7 @@ class ChunkManager:
         self.target_max_chunk_words = max_words
         # Recalculate stats for all chunks to reflect new targets in color coding (if displayed)
         for chunk in self._chunks:
-            chunk['stats'] = self._calculate_chunk_stats(chunk['content'])
 # --- Streamlit UI Definition ---
 st.set_page_config(layout="wide", page_title="Chunk-Powered Webpage Editor")
@@ -384,35 +374,23 @@ if 'chunk_content_editor' not in st.session_state:
 if 'final_markdown' not in st.session_state:
     st.session_state.final_markdown = "Click 'Compile All Chunks' to see the final document with your edits."
 # Instantiate the managers
 content_processor = st.session_state.content_processor
 chunk_manager = st.session_state.chunk_manager
 st.markdown("# <center>✨ Chunk-Powered Webpage Editor ✨</center>", unsafe_allow_html=True)
-st.markdown("""
-Enter a URL, fetch its content, and break it into editable 'chunks'.
-Review statistics, set targets, edit chunks, and compile your final Markdown.
-<br>
-<div style="font-size: 0.9em; margin-bottom: 12px;">
-    Inspired by <a href="https://www.linkedin.com/pulse/understanding-chunking-google-ai-mode-practical-content-volpini-zseaf/" target="_blank">Andrea Volpini</a>
-</div>
-<div style="display: flex; justify-content: flex-start; align-items: center; gap: 16px;">
     <span>Runs best on Desktop. App created by <a href="https://www.linkedin.com/in/emilijagjorgjevska/" target="_blank">Emilija Gjorgjevska</a></span>
     <a href="https://buymeacoffee.com/emiliagjorgjevska" target="_blank">
         <img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" style="height: 30px;">
-    </a>
-</div>
-<br>
-""", unsafe_allow_html=True)
 # --- URL Input and Processing ---
 col1, col2 = st.columns([4, 1])
 with col1:
     url_input = st.text_input(
-        label="Enter Webpage URL",
         placeholder="e.g., https://www.llamaindex.ai/blog/what-is-llamaindex",
         key="url_input"
     )
@@ -430,7 +408,7 @@ if process_button:
     else:
         with st.spinner("Processing URL..."):
             markdown_content = content_processor.fetch_and_convert_to_markdown(url_input)
             if "Error" in markdown_content:
                 chunk_manager.set_chunks([])
                 st.session_state.status_message = markdown_content
@@ -438,62 +416,38 @@ if process_button:
                 chunks = content_processor.parse_markdown_into_chunks(markdown_content)
                 chunk_manager.set_chunks(chunks)
                 st.session_state.status_message = "URL processed successfully!" if chunks else "URL processed, but no content chunks could be extracted."
                 if chunks:
                     st.session_state.chunk_selector = chunk_manager.get_chunk_titles_for_dropdown()[0]
                 else:
                     st.session_state.chunk_selector = None
 # --- Tabs for Editor and Overview ---
 tab1, tab2 = st.tabs(["Editor", "Document Overview & Targets"])
 with tab1:
     st.markdown("## Edit Chunks Individually")
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        chunk_selector_options = chunk_manager.get_chunk_titles_for_dropdown()
-        if chunk_selector_options:
-            try:
-                # Find the index of the currently selected item to handle updates
-                current_selection_index = chunk_selector_options.index(st.session_state.chunk_selector)
-            except (ValueError, TypeError):
-                current_selection_index = 0
-            selected_chunk_title = st.selectbox(
-                label="Select Chunk to Edit",
-                options=chunk_selector_options,
-                index=current_selection_index,
-                key="chunk_selector"
-            )
-        else:
-            selected_chunk_title = st.selectbox(
-                label="Select Chunk to Edit",
-                options=["No chunks available"],
-                disabled=True
-            )
-    with col2:
-        nav_col1, nav_col2 = st.columns(2)
-        with nav_col1:
-            if st.button("⬅️ Previous Chunk", use_container_width=True):
-                if selected_chunk_title and "No chunks available" not in selected_chunk_title:
-                    current_id = int(selected_chunk_title.split(':')[0].strip())
-                    new_id = max(0, current_id - 1)
-                    new_chunk = chunk_manager.get_chunk_by_id(new_id)
-                    if new_chunk:
-                        st.session_state.chunk_selector = f"{new_chunk['id']}: {new_chunk['title']}"
-        with nav_col2:
-            if st.button("Next Chunk ➡️", use_container_width=True):
-                 if selected_chunk_title and "No chunks available" not in selected_chunk_title:
-                    current_id = int(selected_chunk_title.split(':')[0].strip())
-                    new_id = min(len(chunk_manager.get_chunks()) - 1, current_id + 1)
-                    new_chunk = chunk_manager.get_chunk_by_id(new_id)
-                    if new_chunk:
-                        st.session_state.chunk_selector = f"{new_chunk['id']}: {new_chunk['title']}"
     # Get the currently selected chunk
     selected_chunk = None
@@ -507,14 +461,14 @@ with tab1:
             value=selected_chunk["title"],
             disabled=True
         )
         chunk_content_editor = st.text_area(
             label="Chunk Content",
             value=selected_chunk["content"],
             height=250,
             key=f"editor_{selected_chunk['id']}" # Unique key to prevent state loss
         )
         st.markdown(
             chunk_manager.format_chunk_stats(selected_chunk['stats']),
             unsafe_allow_html=True
@@ -537,15 +491,13 @@ with tab1:
                 else:
                     st.session_state.chunk_selector = None
     else:
         st.text_input("Chunk Title (Auto-detected)", "Title of the selected chunk", disabled=True)
         st.text_area("Chunk Content", "Content of the selected chunk will appear here for editing.", height=250, disabled=True)
         st.markdown("Chunk statistics will appear here.")
     st.markdown("---")
     st.markdown("## Final Compiled Markdown")
     if st.button("Compile All Chunks", use_container_width=True):
         st.session_state.final_markdown = chunk_manager.get_final_markdown()
@@ -556,11 +508,10 @@ with tab1:
         key="final_markdown_output",
         disabled=False
     )
 with tab2:
     st.markdown("## Document Summary Statistics")
     st.markdown(chunk_manager.get_document_summary_stats(), unsafe_allow_html=True)
     st.markdown("---")
     st.markdown("## Content Targets")
     st.markdown("Adjust these targets to guide your writing and see visual feedback in the chunk selector (green=good, red=needs attention).")
@@ -573,13 +524,13 @@ with tab2:
         with col2:
             target_grade_max_input = st.number_input("Max Flesch-Kincaid Grade", value=float(chunk_manager.target_grade_max))
             target_max_chunk_words_input = st.number_input("Max Chunk Words", value=chunk_manager.target_max_chunk_words)
         submitted = st.form_submit_button("Set New Targets", use_container_width=True)
         if submitted:
             chunk_manager.set_targets(
-                target_flesch_min_input,
-                target_grade_max_input,
-                int(target_min_chunk_words_input),
                 int(target_max_chunk_words_input)
             )
             st.session_state.status_message = "Target settings updated."

             response = requests.get(url, timeout=10) # Add a timeout for robustness
             response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
             html_content = response.text
             soup = BeautifulSoup(html_content, 'html.parser')
             # Aggressive initial removal of script, style, and meta tags that are never content.
             for tag_name in ['script', 'style', 'noscript', 'meta', 'link']:
                 for element in soup.find_all(tag_name):
                                      soup.find('div', {'role': 'main'})
             # Fallback logic if main content container wasn't found
+            if not content_for_conversion:
+                 first_h1 = soup.find('h1')
+                 if first_h1:
                     candidate_container = first_h1.parent
                     found_main_wrapper_via_h1_parent = False
                     # Check up to 5 parent levels for a suitable content wrapper
+                    for _ in range(5):
+                         if candidate_container is None:
                             break
+                         if candidate_container.name in ['article', 'main', 'section', 'div'] and \
                            any(cls in candidate_container.get('class', []) for cls in ['content', 'post-body', 'article-content', 'entry-content', 'main-content']) or \
                            candidate_container.get('role') == 'main':
                             content_for_conversion = candidate_container
                             found_main_wrapper_via_h1_parent = True
                             break
+                         candidate_container = candidate_container.parent
                     # If no clear wrapper found via H1's parent, take H1 and its direct siblings as a fallback
                     if not found_main_wrapper_via_h1_parent:
                         temp_soup = BeautifulSoup('', 'html.parser')
                             temp_soup.append(current_element)
                             current_element = current_element.next_sibling
                         content_for_conversion = temp_soup
+                 else:
                     # Ultimate fallback: use the entire body if no specific content tags or H1 found
                     content_for_conversion = soup.body
                 'map', 'area', 'embed', 'object', 'param', 'applet', 'bgsound', 'frame',
                 'frameset', 'noframes', 'template', 'slot', 'portal', 'datalist', 'keygen',
                 'output', 'progress', 'meter', 'details', 'summary', 'dialog', 'menu',
+                'menuitem', 'command', 'hr', 'figure', 'figcaption', 'cite',
                 '.social-share', '.comments', '.related-posts', '.pagination',
                 '.breadcrumbs', '.pop-up', '.modal', '.overlay', '.cookie-consent',
                 '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]',
                 '[class*="announcement"]', '[class*="fixed-bottom"]', '[class*="fixed-top"]',
                 '[id*="promo"]', '[id*="ad"]', '[id*="banner"]', '[id*="popup"]', '[id*="modal"]',
                 '[id*="overlay"]', '[id*="cookie"]', '[id*="skip"]', '[id*="navbar"]', '[id*="menu"]',
+                '.hidden', '.visually-hidden',
+                '.no-print', '.print-hide',
+                '.wp-block-navigation', '.wp-block-group.is-style-stripes',
+                '[class*="column"]', '[class*="grid"]'
+             ]
             for selector in unwanted_elements_in_content:
+                if re.match(r'^[a-zA-Z0-9]+$', selector):
+                     for element in content_for_conversion.find_all(selector):
                         element.decompose()
+                else:
+                     for element in content_for_conversion.select(selector):
                         element.decompose()
             markdown_output = convert_to_markdown(str(content_for_conversion))
             # Post-processing: Clean up resulting Markdown
             markdown_output = re.sub(r'\n\s*\n\s*\n+', '\n\n', markdown_output)
             markdown_output = re.sub(r'^\s*[\*\-]\s*$', '', markdown_output, flags=re.MULTILINE)
+            markdown_output = re.sub(r'\*{3,}', '', markdown_output)
             markdown_output = markdown_output.strip()
             return markdown_output
         print(f"✅ Parsed {len(nodes)} nodes from Markdown.") # Debug print
         structured_chunks = []
+        current_id = 0
         for node in nodes:
             pure_text_content = node.get_content(metadata_mode=MetadataMode.NONE).strip()
             heading_title = ""
             content_text = pure_text_content
             heading_match = re.match(r"^(#+)\s*(.*)", pure_text_content)
             if heading_match:
                 heading_title = heading_match.group(2).strip()
                 content_text = pure_text_content[len(heading_match.group(0)):].strip()
+                if not heading_title:
+                     heading_title = "[Untitled Section]"
             else:
                 first_line = content_text.split('\n')[0].strip()
                 heading_title = first_line[:70].strip() + "..." if len(first_line) > 70 else first_line
                 "original_node": node # Keep reference to the original LlamaIndex node
             })
             current_id += 1
         return structured_chunks
 class ChunkManager:
         (Private helper method, SRP for stats calculation)
         """
         stats = {}
+        cleaned_text = re.sub(r'#+\s*', '', text)
+        cleaned_text = re.sub(r'[\*\-]\s*', '', cleaned_text)
+        cleaned_text = re.sub(r'\n\s*\n+', ' ', cleaned_text).strip()
         stats['word_count'] = textstat.lexicon_count(cleaned_text, removepunct=True)
         stats['char_count'] = len(cleaned_text)
         stats['sentence_count'] = textstat.sentence_count(cleaned_text)
         if stats['sentence_count'] > 0:
             stats['avg_sentence_length'] = stats['word_count'] / stats['sentence_count']
         else:
             stats['flesch_reading_ease'] = textstat.flesch_reading_ease(cleaned_text)
         except Exception:
             stats['flesch_reading_ease'] = 0
         try:
             stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(cleaned_text)
         except Exception:
             stats['flesch_kincaid_grade'] = 0
         try:
             stats['gunning_fog_score'] = textstat.gunning_fog(cleaned_text)
         except Exception:
             stats['gunning_fog_score'] = 0
         return stats
     def format_chunk_stats(self, stats: dict) -> str:
         total_chars = 0
         total_sentences = 0
         total_paragraphs = 0
         all_content_text = ""
         for chunk in self._chunks:
             content_text_for_stats = chunk['content']
         doc_stats_str += f"- **Total Characters:** {total_chars}\n"
         doc_stats_str += f"- **Total Sentences:** {total_sentences}\n"
         doc_stats_str += f"- **Total Paragraphs:** {total_paragraphs}\n"
         if len(self._chunks) > 0:
             doc_stats_str += f"- **Average Words per Chunk:** {total_words / len(self._chunks):.2f}\n"
         if all_content_text.strip():
             overall_stats = self._calculate_chunk_stats(all_content_text)
             doc_stats_str += f"- **Overall Flesch Reading Ease:** {overall_stats['flesch_reading_ease']:.2f}\n"
             doc_stats_str += f"- **Overall Average Sentence Length:** {overall_stats['avg_sentence_length']:.2f} words\n"
         else:
             doc_stats_str += "- No content available for overall readability metrics.\n"
         return doc_stats_str
     def get_chunk_by_id(self, chunk_id: int) -> dict | None:
         self._chunks = [chunk for chunk in self._chunks if chunk["id"] != chunk_id]
         if len(self._chunks) == initial_chunk_count:
             return False # Chunk not found
         # Re-index IDs to be sequential again
         for i, chunk in enumerate(self._chunks):
             chunk['id'] = i
         return True
     def get_final_markdown(self) -> str:
         """Compiles all current chunks into a single Markdown string."""
         final_md = ""
         if not self._chunks:
             return "No content to compile. Please process a URL first."
         for chunk in self._chunks:
             # Use H1 heading if title is meaningful
             if not chunk["title"].startswith("[") and chunk["title"]:
                 final_md += f"# {chunk['title']}\n\n"
             final_md += f"{chunk['content']}\n\n"
         return final_md.strip()
     def set_targets(self, flesch_min: float, grade_max: float, min_words: int, max_words: int):
         self.target_max_chunk_words = max_words
         # Recalculate stats for all chunks to reflect new targets in color coding (if displayed)
         for chunk in self._chunks:
+            chunk['stats'] = self._calculate_chunk_stats(chunk['content'])
 # --- Streamlit UI Definition ---
 st.set_page_config(layout="wide", page_title="Chunk-Powered Webpage Editor")
 if 'final_markdown' not in st.session_state:
     st.session_state.final_markdown = "Click 'Compile All Chunks' to see the final document with your edits."
 # Instantiate the managers
 content_processor = st.session_state.content_processor
 chunk_manager = st.session_state.chunk_manager
 st.markdown("# <center>✨ Chunk-Powered Webpage Editor ✨</center>", unsafe_allow_html=True)
+st.markdown("""Enter a URL, fetch its content, and break it into editable 'chunks'. Review statistics, set targets, edit chunks, and compile your final Markdown.<br><div style="font-size: 0.9em; margin-bottom: 12px;">
+    Inspired by <a href="https://www.linkedin.com/pulse/understanding-chunking-google-ai-mode-practical-content-volpini-zseaf/" target="_blank">Andrea Volpini</a></div><div style="display: flex; justify-content: flex-start; align-items: center; gap: 16px;">
     <span>Runs best on Desktop. App created by <a href="https://www.linkedin.com/in/emilijagjorgjevska/" target="_blank">Emilija Gjorgjevska</a></span>
     <a href="https://buymeacoffee.com/emiliagjorgjevska" target="_blank">
         <img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" style="height: 30px;">
+    </a></div><br>""", unsafe_allow_html=True)
 # --- URL Input and Processing ---
 col1, col2 = st.columns([4, 1])
 with col1:
     url_input = st.text_input(
+        label="Enter Webpage URL",
         placeholder="e.g., https://www.llamaindex.ai/blog/what-is-llamaindex",
         key="url_input"
     )
     else:
         with st.spinner("Processing URL..."):
             markdown_content = content_processor.fetch_and_convert_to_markdown(url_input)
             if "Error" in markdown_content:
                 chunk_manager.set_chunks([])
                 st.session_state.status_message = markdown_content
                 chunks = content_processor.parse_markdown_into_chunks(markdown_content)
                 chunk_manager.set_chunks(chunks)
                 st.session_state.status_message = "URL processed successfully!" if chunks else "URL processed, but no content chunks could be extracted."
                 if chunks:
                     st.session_state.chunk_selector = chunk_manager.get_chunk_titles_for_dropdown()[0]
                 else:
                     st.session_state.chunk_selector = None
 # --- Tabs for Editor and Overview ---
 tab1, tab2 = st.tabs(["Editor", "Document Overview & Targets"])
 with tab1:
     st.markdown("## Edit Chunks Individually")
+    chunk_selector_options = chunk_manager.get_chunk_titles_for_dropdown()
+    if chunk_selector_options:
+        try:
+            # Find the index of the currently selected item to handle updates
+            current_selection_index = chunk_selector_options.index(st.session_state.chunk_selector)
+        except (ValueError, TypeError):
+            current_selection_index = 0
+        selected_chunk_title = st.selectbox(
+            label="Select Chunk to Edit",
+            options=chunk_selector_options,
+            index=current_selection_index,
+            key="chunk_selector"
+        )
+    else:
+        selected_chunk_title = st.selectbox(
+            label="Select Chunk to Edit",
+            options=["No chunks available"],
+            disabled=True
+        )
     # Get the currently selected chunk
     selected_chunk = None
             value=selected_chunk["title"],
             disabled=True
         )
         chunk_content_editor = st.text_area(
             label="Chunk Content",
             value=selected_chunk["content"],
             height=250,
             key=f"editor_{selected_chunk['id']}" # Unique key to prevent state loss
         )
         st.markdown(
             chunk_manager.format_chunk_stats(selected_chunk['stats']),
             unsafe_allow_html=True
                 else:
                     st.session_state.chunk_selector = None
     else:
         st.text_input("Chunk Title (Auto-detected)", "Title of the selected chunk", disabled=True)
         st.text_area("Chunk Content", "Content of the selected chunk will appear here for editing.", height=250, disabled=True)
         st.markdown("Chunk statistics will appear here.")
     st.markdown("---")
     st.markdown("## Final Compiled Markdown")
     if st.button("Compile All Chunks", use_container_width=True):
         st.session_state.final_markdown = chunk_manager.get_final_markdown()
         key="final_markdown_output",
         disabled=False
     )
 with tab2:
     st.markdown("## Document Summary Statistics")
     st.markdown(chunk_manager.get_document_summary_stats(), unsafe_allow_html=True)
     st.markdown("---")
     st.markdown("## Content Targets")
     st.markdown("Adjust these targets to guide your writing and see visual feedback in the chunk selector (green=good, red=needs attention).")
         with col2:
             target_grade_max_input = st.number_input("Max Flesch-Kincaid Grade", value=float(chunk_manager.target_grade_max))
             target_max_chunk_words_input = st.number_input("Max Chunk Words", value=chunk_manager.target_max_chunk_words)
         submitted = st.form_submit_button("Set New Targets", use_container_width=True)
         if submitted:
             chunk_manager.set_targets(
+                target_flesch_min_input,
+                target_grade_max_input,
+                int(target_min_chunk_words_input),
                 int(target_max_chunk_words_input)
             )
             st.session_state.status_message = "Target settings updated."