rodolphethinks1 commited on
Commit
075ec1e
·
verified ·
1 Parent(s): 8cd8cd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -14
app.py CHANGED
@@ -4,6 +4,7 @@ from datetime import datetime
4
  import pandas as pd
5
  import re
6
 
 
7
  def parse_content(html_content):
8
  # Parse the HTML using BeautifulSoup
9
  soup = BeautifulSoup(html_content, 'html.parser')
@@ -34,8 +35,7 @@ def parse_content(html_content):
34
  article_content = "null"
35
  article_author = article_author.text.strip() if article_author else "null"
36
 
37
- # Create a DataFrame
38
- parsed_data = {
39
  'Title': article_title,
40
  'Date': article_date,
41
  'Author': article_author,
@@ -43,28 +43,48 @@ def parse_content(html_content):
43
  'Reviews': article_reviews,
44
  'Content': article_content,
45
  }
46
- df = pd.DataFrame([parsed_data])
47
- return df
48
 
49
- def process_input(html_content):
50
- try:
51
- df = parse_content(html_content)
52
- return df
53
- except Exception as e:
54
- return f"Error: {e}"
 
 
 
 
 
 
 
 
 
 
55
 
56
  with gr.Blocks() as interface:
57
  with gr.Column():
58
  gr.Markdown("## HTML Content Parser")
 
 
59
  html_input = gr.Textbox(
60
  label="Paste HTML Content",
61
  placeholder="Paste your HTML content here...",
62
- lines=5, # Restrict initial height
63
- max_lines=10 # Maximum height before scrolling
64
  )
65
  parse_button = gr.Button("Parse Content")
66
- parsed_output = gr.Dataframe(label="Parsed Data")
 
 
 
 
 
67
 
68
- parse_button.click(fn=process_input, inputs=html_input, outputs=parsed_output)
 
 
 
 
 
69
 
70
  interface.launch()
 
4
  import pandas as pd
5
  import re
6
 
7
+ # Function to parse HTML content and extract details
8
  def parse_content(html_content):
9
  # Parse the HTML using BeautifulSoup
10
  soup = BeautifulSoup(html_content, 'html.parser')
 
35
  article_content = "null"
36
  article_author = article_author.text.strip() if article_author else "null"
37
 
38
+ return {
 
39
  'Title': article_title,
40
  'Date': article_date,
41
  'Author': article_author,
 
43
  'Reviews': article_reviews,
44
  'Content': article_content,
45
  }
 
 
46
 
47
+ # Function to update the dataframe
48
+ def update_dataframe(html_content, dataframe):
49
+ parsed_data = parse_content(html_content)
50
+ # Append the new row to the dataframe
51
+ dataframe = dataframe.append(parsed_data, ignore_index=True)
52
+ return dataframe
53
+
54
+ # Initialize an empty dataframe with 3 empty rows
55
+ initial_df = pd.DataFrame({
56
+ 'Title': [""] * 3,
57
+ 'Date': [""] * 3,
58
+ 'Author': [""] * 3,
59
+ 'Views': [""] * 3,
60
+ 'Reviews': [""] * 3,
61
+ 'Content': [""] * 3
62
+ })
63
 
64
  with gr.Blocks() as interface:
65
  with gr.Column():
66
  gr.Markdown("## HTML Content Parser")
67
+
68
+ # Input and Button
69
  html_input = gr.Textbox(
70
  label="Paste HTML Content",
71
  placeholder="Paste your HTML content here...",
72
+ lines=5,
73
+ max_lines=10
74
  )
75
  parse_button = gr.Button("Parse Content")
76
+
77
+ # Dataframe display
78
+ parsed_dataframe = gr.Dataframe(value=initial_df, label="Parsed Data", interactive=False)
79
+
80
+ # Hidden state to store the dataframe
81
+ dataframe_state = gr.State(value=initial_df)
82
 
83
+ # Define the button click event
84
+ parse_button.click(
85
+ fn=update_dataframe,
86
+ inputs=[html_input, dataframe_state],
87
+ outputs=[parsed_dataframe, dataframe_state]
88
+ )
89
 
90
  interface.launch()