Em4e commited on
Commit
436647a
·
verified ·
1 Parent(s): 47b34c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -44
app.py CHANGED
@@ -6,6 +6,9 @@ from llama_index.core.node_parser import MarkdownNodeParser
6
  from llama_index.core.schema import Document, MetadataMode
7
  import textstat
8
  from markdownify import markdownify as md
 
 
 
9
 
10
  # --- Core Logic Classes ---
11
  class WebpageContentProcessor:
@@ -107,9 +110,11 @@ class ChunkManager:
107
  """Calculates readability and other metrics for a text chunk."""
108
  stats = {}
109
  try:
110
- stats['word_count'] = textstat.lexicon_count(text, removepunct=True)
111
- stats['flesch_reading_ease'] = textstat.flesch_reading_ease(text)
112
- stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(text)
 
 
113
  except (Exception, TypeError):
114
  stats.update({'word_count': 0, 'flesch_reading_ease': 0, 'flesch_kincaid_grade': 0})
115
  return stats
@@ -148,6 +153,7 @@ class ChunkManager:
148
  def update_chunk_content(self, chunk_id: int, new_content: str):
149
  chunk = self.get_chunk_by_id(chunk_id)
150
  if chunk:
 
151
  chunk["content"] = new_content
152
  self._add_stats_to_chunk(chunk)
153
  if chunk["title"].startswith("["):
@@ -192,7 +198,6 @@ def init_session_state():
192
  st.session_state.selected_chunk_id = None
193
  if 'status_message' not in st.session_state:
194
  st.session_state.status_message = ""
195
- # REMOVED: The show_preview state is no longer needed with the side-by-side layout
196
 
197
  init_session_state()
198
 
@@ -229,13 +234,13 @@ url_input = st.text_input("Enter a webpage URL to start", key="url_input")
229
  if st.button("Process URL", use_container_width=True, type="primary"):
230
  if url_input:
231
  with st.spinner("Fetching and chunking content..."):
232
- markdown = processor.fetch_and_convert_to_markdown(url_input)
233
- if "Error" in markdown:
234
- st.session_state.status_message = markdown
235
  manager.set_chunks([])
236
  st.session_state.selected_chunk_id = None
237
  else:
238
- chunks = processor.parse_markdown_into_chunks(markdown)
239
  manager.set_chunks(chunks)
240
  if chunks:
241
  st.session_state.status_message = f"Successfully processed {len(chunks)} chunks."
@@ -277,41 +282,43 @@ with tab1:
277
  selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
278
 
279
  if selected_chunk:
280
- # --- NEW: Side-by-side layout for editor and live preview ---
281
- editor_col, preview_col = st.columns(2)
282
-
283
- with editor_col:
284
- st.markdown(f"**Editing: {selected_chunk['title']}**")
285
- st.markdown(manager.format_chunk_stats(selected_chunk['stats']), unsafe_allow_html=True)
286
-
287
- edited_content = st.text_area(
288
- "Chunk Content (Markdown)",
289
- value=selected_chunk['content'],
290
- height=400, # Increased height for better visibility
291
- key=f"editor_{selected_chunk['id']}"
292
- )
293
-
294
- # Action buttons below the editor
295
- b_col1, b_col2, _ = st.columns([1, 1, 3])
296
-
297
- if b_col1.button("Update Chunk", use_container_width=True, key=f"update_{selected_chunk['id']}"):
298
- manager.update_chunk_content(selected_chunk['id'], edited_content)
299
- st.session_state.status_message = "Chunk updated successfully!"
300
- st.rerun()
301
-
302
- if b_col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
303
- manager.delete_chunk(selected_chunk['id'])
304
- st.session_state.status_message = "Chunk deleted."
305
- remaining_chunks = manager.get_chunks()
306
- st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
307
- st.rerun()
308
-
309
- with preview_col:
310
- st.markdown("**Live Preview**")
311
- # The container now continuously renders the content from the editor
312
- with st.container(height=525, border=True):
313
- st.markdown(edited_content, unsafe_allow_html=True)
314
-
 
 
315
 
316
  with tab2:
317
  st.subheader("Document Overview")
@@ -332,4 +339,4 @@ with tab2:
332
  st.rerun()
333
 
334
  st.subheader("Final Compiled Document")
335
- st.text_area("Final Markdown Output", manager.get_final_markdown(), height=500, key="final_markdown")
 
6
  from llama_index.core.schema import Document, MetadataMode
7
  import textstat
8
  from markdownify import markdownify as md
9
+ # NEW: Imports for the WYSIWYG editor and Markdown-HTML conversion
10
+ from streamlit_quill import st_quill
11
+ import markdown
12
 
13
  # --- Core Logic Classes ---
14
  class WebpageContentProcessor:
 
110
  """Calculates readability and other metrics for a text chunk."""
111
  stats = {}
112
  try:
113
+ # textstat requires plain text, so we ensure any residual HTML is stripped
114
+ clean_text = re.sub('<[^<]+?>', '', text)
115
+ stats['word_count'] = textstat.lexicon_count(clean_text, removepunct=True)
116
+ stats['flesch_reading_ease'] = textstat.flesch_reading_ease(clean_text)
117
+ stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(clean_text)
118
  except (Exception, TypeError):
119
  stats.update({'word_count': 0, 'flesch_reading_ease': 0, 'flesch_kincaid_grade': 0})
120
  return stats
 
153
  def update_chunk_content(self, chunk_id: int, new_content: str):
154
  chunk = self.get_chunk_by_id(chunk_id)
155
  if chunk:
156
+ # The content is now Markdown, converted from the editor's HTML
157
  chunk["content"] = new_content
158
  self._add_stats_to_chunk(chunk)
159
  if chunk["title"].startswith("["):
 
198
  st.session_state.selected_chunk_id = None
199
  if 'status_message' not in st.session_state:
200
  st.session_state.status_message = ""
 
201
 
202
  init_session_state()
203
 
 
234
  if st.button("Process URL", use_container_width=True, type="primary"):
235
  if url_input:
236
  with st.spinner("Fetching and chunking content..."):
237
+ markdown_content = processor.fetch_and_convert_to_markdown(url_input)
238
+ if "Error" in markdown_content:
239
+ st.session_state.status_message = markdown_content
240
  manager.set_chunks([])
241
  st.session_state.selected_chunk_id = None
242
  else:
243
+ chunks = processor.parse_markdown_into_chunks(markdown_content)
244
  manager.set_chunks(chunks)
245
  if chunks:
246
  st.session_state.status_message = f"Successfully processed {len(chunks)} chunks."
 
282
  selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
283
 
284
  if selected_chunk:
285
+ st.markdown(f"**Editing: {selected_chunk['title']}**")
286
+ st.markdown(manager.format_chunk_stats(selected_chunk['stats']), unsafe_allow_html=True)
287
+
288
+ # --- NEW: Replaced editor with WYSIWYG component ---
289
+ # 1. Convert our stored Markdown to HTML for the editor
290
+ content_as_html = markdown.markdown(selected_chunk['content'])
291
+
292
+ # 2. Instantiate the WYSIWYG editor
293
+ edited_html = st_quill(
294
+ value=content_as_html,
295
+ key=f"editor_{selected_chunk['id']}",
296
+ html=True, # Work in HTML mode
297
+ toolbar=[ # Customize the toolbar
298
+ [{'header': [1, 2, 3, 4, 5, 6, False]}],
299
+ ['bold', 'italic', 'underline', 'strike'],
300
+ [{'list': 'ordered'}, {'list': 'bullet'}],
301
+ ['link', 'image'],
302
+ ['clean']
303
+ ]
304
+ )
305
+
306
+ # 3. Action buttons
307
+ b_col1, b_col2, _ = st.columns([1, 1, 5])
308
+
309
+ if b_col1.button("Update Chunk", use_container_width=True, key=f"update_{selected_chunk['id']}"):
310
+ # Convert the editor's HTML output back to Markdown before saving
311
+ updated_markdown = md(edited_html, heading_style="ATX")
312
+ manager.update_chunk_content(selected_chunk['id'], updated_markdown)
313
+ st.session_state.status_message = "Chunk updated successfully!"
314
+ st.rerun()
315
+
316
+ if b_col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
317
+ manager.delete_chunk(selected_chunk['id'])
318
+ st.session_state.status_message = "Chunk deleted."
319
+ remaining_chunks = manager.get_chunks()
320
+ st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
321
+ st.rerun()
322
 
323
  with tab2:
324
  st.subheader("Document Overview")
 
339
  st.rerun()
340
 
341
  st.subheader("Final Compiled Document")
342
+ st.text_area("Final Markdown Output", manager.get_final_markdown(), height=500, key="final_markdown")