Em4e commited on
Commit
3217d2c
·
verified ·
1 Parent(s): fedcfde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -50
app.py CHANGED
@@ -6,9 +6,6 @@ from llama_index.core.node_parser import MarkdownNodeParser
6
  from llama_index.core.schema import Document, MetadataMode
7
  import textstat
8
  from markdownify import markdownify as md
9
- # NEW: Imports for the WYSIWYG editor and Markdown-HTML conversion
10
- from streamlit_quill import st_quill
11
- import markdown
12
 
13
  # --- Core Logic Classes ---
14
  class WebpageContentProcessor:
@@ -110,11 +107,9 @@ class ChunkManager:
110
  """Calculates readability and other metrics for a text chunk."""
111
  stats = {}
112
  try:
113
- # textstat requires plain text, so we ensure any residual HTML is stripped
114
- clean_text = re.sub('<[^<]+?>', '', text)
115
- stats['word_count'] = textstat.lexicon_count(clean_text, removepunct=True)
116
- stats['flesch_reading_ease'] = textstat.flesch_reading_ease(clean_text)
117
- stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(clean_text)
118
  except (Exception, TypeError):
119
  stats.update({'word_count': 0, 'flesch_reading_ease': 0, 'flesch_kincaid_grade': 0})
120
  return stats
@@ -153,7 +148,6 @@ class ChunkManager:
153
  def update_chunk_content(self, chunk_id: int, new_content: str):
154
  chunk = self.get_chunk_by_id(chunk_id)
155
  if chunk:
156
- # The content is now Markdown, converted from the editor's HTML
157
  chunk["content"] = new_content
158
  self._add_stats_to_chunk(chunk)
159
  if chunk["title"].startswith("["):
@@ -234,13 +228,13 @@ url_input = st.text_input("Enter a webpage URL to start", key="url_input")
234
  if st.button("Process URL", use_container_width=True, type="primary"):
235
  if url_input:
236
  with st.spinner("Fetching and chunking content..."):
237
- markdown_content = processor.fetch_and_convert_to_markdown(url_input)
238
- if "Error" in markdown_content:
239
- st.session_state.status_message = markdown_content
240
  manager.set_chunks([])
241
  st.session_state.selected_chunk_id = None
242
  else:
243
- chunks = processor.parse_markdown_into_chunks(markdown_content)
244
  manager.set_chunks(chunks)
245
  if chunks:
246
  st.session_state.status_message = f"Successfully processed {len(chunks)} chunks."
@@ -282,43 +276,39 @@ with tab1:
282
  selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
283
 
284
  if selected_chunk:
285
- st.markdown(f"**Editing: {selected_chunk['title']}**")
286
- st.markdown(manager.format_chunk_stats(selected_chunk['stats']), unsafe_allow_html=True)
287
-
288
- # --- NEW: Replaced editor with WYSIWYG component ---
289
- # 1. Convert our stored Markdown to HTML for the editor
290
- content_as_html = markdown.markdown(selected_chunk['content'])
291
-
292
- # 2. Instantiate the WYSIWYG editor
293
- edited_html = st_quill(
294
- value=content_as_html,
295
- key=f"editor_{selected_chunk['id']}",
296
- html=True, # Work in HTML mode
297
- toolbar=[ # Customize the toolbar
298
- [{'header': [1, 2, 3, 4, 5, 6, False]}],
299
- ['bold', 'italic', 'underline', 'strike'],
300
- [{'list': 'ordered'}, {'list': 'bullet'}],
301
- ['link', 'image'],
302
- ['clean']
303
- ]
304
- )
305
-
306
- # 3. Action buttons
307
- b_col1, b_col2, _ = st.columns([1, 1, 5])
308
-
309
- if b_col1.button("Update Chunk", use_container_width=True, key=f"update_{selected_chunk['id']}"):
310
- # Convert the editor's HTML output back to Markdown before saving
311
- updated_markdown = md(edited_html, heading_style="ATX")
312
- manager.update_chunk_content(selected_chunk['id'], updated_markdown)
313
- st.session_state.status_message = "Chunk updated successfully!"
314
- st.rerun()
315
-
316
- if b_col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
317
- manager.delete_chunk(selected_chunk['id'])
318
- st.session_state.status_message = "Chunk deleted."
319
- remaining_chunks = manager.get_chunks()
320
- st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
321
- st.rerun()
322
 
323
  with tab2:
324
  st.subheader("Document Overview")
 
6
  from llama_index.core.schema import Document, MetadataMode
7
  import textstat
8
  from markdownify import markdownify as md
 
 
 
9
 
10
  # --- Core Logic Classes ---
11
  class WebpageContentProcessor:
 
107
  """Calculates readability and other metrics for a text chunk."""
108
  stats = {}
109
  try:
110
+ stats['word_count'] = textstat.lexicon_count(text, removepunct=True)
111
+ stats['flesch_reading_ease'] = textstat.flesch_reading_ease(text)
112
+ stats['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(text)
 
 
113
  except (Exception, TypeError):
114
  stats.update({'word_count': 0, 'flesch_reading_ease': 0, 'flesch_kincaid_grade': 0})
115
  return stats
 
148
  def update_chunk_content(self, chunk_id: int, new_content: str):
149
  chunk = self.get_chunk_by_id(chunk_id)
150
  if chunk:
 
151
  chunk["content"] = new_content
152
  self._add_stats_to_chunk(chunk)
153
  if chunk["title"].startswith("["):
 
228
  if st.button("Process URL", use_container_width=True, type="primary"):
229
  if url_input:
230
  with st.spinner("Fetching and chunking content..."):
231
+ markdown = processor.fetch_and_convert_to_markdown(url_input)
232
+ if "Error" in markdown:
233
+ st.session_state.status_message = markdown
234
  manager.set_chunks([])
235
  st.session_state.selected_chunk_id = None
236
  else:
237
+ chunks = processor.parse_markdown_into_chunks(markdown)
238
  manager.set_chunks(chunks)
239
  if chunks:
240
  st.session_state.status_message = f"Successfully processed {len(chunks)} chunks."
 
276
  selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
277
 
278
  if selected_chunk:
279
+ # --- RESTORED: Side-by-side layout for editor and live preview ---
280
+ editor_col, preview_col = st.columns(2)
281
+
282
+ with editor_col:
283
+ st.markdown(f"**Editing: {selected_chunk['title']}**")
284
+ st.markdown(manager.format_chunk_stats(selected_chunk['stats']), unsafe_allow_html=True)
285
+
286
+ edited_content = st.text_area(
287
+ "Chunk Content (Markdown)",
288
+ value=selected_chunk['content'],
289
+ height=400,
290
+ key=f"editor_{selected_chunk['id']}"
291
+ )
292
+
293
+ b_col1, b_col2, _ = st.columns([1, 1, 3])
294
+
295
+ if b_col1.button("Update Chunk", use_container_width=True, key=f"update_{selected_chunk['id']}"):
296
+ manager.update_chunk_content(selected_chunk['id'], edited_content)
297
+ st.session_state.status_message = "Chunk updated successfully!"
298
+ st.rerun()
299
+
300
+ if b_col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
301
+ manager.delete_chunk(selected_chunk['id'])
302
+ st.session_state.status_message = "Chunk deleted."
303
+ remaining_chunks = manager.get_chunks()
304
+ st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
305
+ st.rerun()
306
+
307
+ with preview_col:
308
+ st.markdown("**Live Preview**")
309
+ with st.container(height=525, border=True):
310
+ st.markdown(edited_content, unsafe_allow_html=True)
311
+
 
 
 
 
312
 
313
  with tab2:
314
  st.subheader("Document Overview")