Spaces:

rodolphethinks1
/

ArticleContentBox

Runtime error

App Files Files Community

rodolphethinks1 commited on Jan 24, 2025

Commit

075ec1e

verified ·

1 Parent(s): 8cd8cd6

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -14

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from datetime import datetime
 import pandas as pd
 import re
 def parse_content(html_content):
     # Parse the HTML using BeautifulSoup
     soup = BeautifulSoup(html_content, 'html.parser')
@@ -34,8 +35,7 @@ def parse_content(html_content):
         article_content = "null"
     article_author = article_author.text.strip() if article_author else "null"
-    # Create a DataFrame
-    parsed_data = {
         'Title': article_title,
         'Date': article_date,
         'Author': article_author,
@@ -43,28 +43,48 @@ def parse_content(html_content):
         'Reviews': article_reviews,
         'Content': article_content,
     }
-    df = pd.DataFrame([parsed_data])
-    return df
-def process_input(html_content):
-    try:
-        df = parse_content(html_content)
-        return df
-    except Exception as e:
-        return f"Error: {e}"
 with gr.Blocks() as interface:
     with gr.Column():
         gr.Markdown("## HTML Content Parser")
         html_input = gr.Textbox(
             label="Paste HTML Content",
             placeholder="Paste your HTML content here...",
-            lines=5,  # Restrict initial height
-            max_lines=10  # Maximum height before scrolling
         )
         parse_button = gr.Button("Parse Content")
-        parsed_output = gr.Dataframe(label="Parsed Data")
-    parse_button.click(fn=process_input, inputs=html_input, outputs=parsed_output)
 interface.launch()

 import pandas as pd
 import re
+# Function to parse HTML content and extract details
 def parse_content(html_content):
     # Parse the HTML using BeautifulSoup
     soup = BeautifulSoup(html_content, 'html.parser')
         article_content = "null"
     article_author = article_author.text.strip() if article_author else "null"
+    return {
         'Title': article_title,
         'Date': article_date,
         'Author': article_author,
         'Reviews': article_reviews,
         'Content': article_content,
     }
+# Function to update the dataframe
+def update_dataframe(html_content, dataframe):
+    parsed_data = parse_content(html_content)
+    # Append the new row to the dataframe
+    dataframe = dataframe.append(parsed_data, ignore_index=True)
+    return dataframe
+# Initialize an empty dataframe with 3 empty rows
+initial_df = pd.DataFrame({
+    'Title': [""] * 3,
+    'Date': [""] * 3,
+    'Author': [""] * 3,
+    'Views': [""] * 3,
+    'Reviews': [""] * 3,
+    'Content': [""] * 3
+})
 with gr.Blocks() as interface:
     with gr.Column():
         gr.Markdown("## HTML Content Parser")
+        # Input and Button
         html_input = gr.Textbox(
             label="Paste HTML Content",
             placeholder="Paste your HTML content here...",
+            lines=5,
+            max_lines=10
         )
         parse_button = gr.Button("Parse Content")
+        # Dataframe display
+        parsed_dataframe = gr.Dataframe(value=initial_df, label="Parsed Data", interactive=False)
+        # Hidden state to store the dataframe
+        dataframe_state = gr.State(value=initial_df)
+    # Define the button click event
+    parse_button.click(
+        fn=update_dataframe,
+        inputs=[html_input, dataframe_state],
+        outputs=[parsed_dataframe, dataframe_state]
+    )
 interface.launch()