hari3485 commited on
Commit
334b087
·
verified ·
1 Parent(s): ebd1124

Update pages/hari.py

Browse files
Files changed (1) hide show
  1. pages/hari.py +40 -38
pages/hari.py CHANGED
@@ -230,75 +230,77 @@ def html_details_page():
230
  **HTML** (HyperText Markup Language) is used to structure web pages.
231
 
232
  - Semi-structured data with nested tags.
233
- - Libraries like `BeautifulSoup` help parse and extract information.
234
  """)
 
 
 
 
 
 
 
 
235
  st.write("""
236
- - **HTML (HyperText Markup Language)** is a semi-structured data format.
237
- - HTML uses tags like `<table>`, `<tr>`, `<th>`, and `<td>` to structure tabular data.
238
- - Unlike XML, HTML does not allow creating custom tags freely.
239
- - Not all HTML content can be converted into dataframes, especially paragraph text or unstructured data.
240
- - Typically, only table-related elements (`<table>`, `<tr>`, `<th>`, `<td>`) can be converted into dataframes.
241
  """)
242
-
243
- # Reading HTML Files Section
244
- st.header("Reading HTML Files into DataFrames")
245
-
246
- st.write("**Reading HTML Files:**")
247
  st.code("""
248
  import pandas as pd
249
- tables = pd.read_html(path_or_buffer)
250
  """, language="python")
251
-
252
  st.write("""
253
- - **`pd.read_html(path_or_buffer)`** reads HTML files or websites containing tables.
254
- - Extracts all tables and returns them as a list of dataframes.
255
  """)
256
-
257
- st.write("**Accessing Specific Tables:**")
258
  st.code("""
259
- # Accessing the first table from the list
260
  table = tables[0]
261
  """, language="python")
262
-
263
  st.write("""
264
- - Each table is stored in the list by index.
265
- - Use indexing to select the table you want to work with.
266
  """)
267
 
268
  st.write("**Limitations:**")
269
  st.write("""
270
- - Not all websites or HTML files can be read, even if they have tables.
271
- - Issues like authorization restrictions can prevent reading certain tables.
272
  """)
273
-
274
- st.write("**Using the `match` Parameter:**")
275
  st.code("""
276
- # Reading a specific table using the match parameter
277
- tables = pd.read_html(path, match="keyword")
278
  """, language="python")
279
-
280
  st.write("""
281
- - To locate specific tables, use `match="keyword"` while reading HTML.
282
- - The `match` parameter searches for tables containing the specified keyword.
283
  """)
284
 
285
- # Exporting DataFrames Section
286
  st.header("Exporting DataFrames to HTML")
287
 
288
- st.write("**Exporting DataFrame to HTML:**")
289
  st.code("""
290
- # Exporting a dataframe to an HTML file
291
  df.to_html("output.html")
292
  """, language="python")
293
 
294
  st.write("""
295
- - Converts a dataframe into an HTML file.
296
- - Saves the dataframe in an HTML-compatible table format at the specified path.
297
  """)
298
- st.code('from bs4 import BeautifulSoup\nsoup = BeautifulSoup(open("file.html"))', language="python")
299
 
300
- if st.button("Back to Home"):
301
- st.session_state['page'] = "home"
302
 
303
  # Unstructured Data - Image Page
304
  def image_details_page():
 
230
  **HTML** (HyperText Markup Language) is used to structure web pages.
231
 
232
  - Semi-structured data with nested tags.
 
233
  """)
234
+
235
+
236
+ # App title
237
+ st.title("Working with HTML Data in Python")
238
+
239
+ # Section: HTML and DataFrames
240
+ st.header("HTML and DataFrames")
241
+
242
  st.write("""
243
+ - **HTML** stands for HyperText Markup Language and is a semi-structured format.
244
+ - HTML uses tags like `<table>`, `<tr>`, `<th>`, and `<td>` to show table data.
245
+ - Unlike XML, HTML doesn’t let you create any custom tags.
246
+ - Not all HTML can be changed into dataframes, especially plain text like paragraphs.
247
+ - Usually, only table-related tags (`<table>`, `<tr>`, `<th>`, `<td>`) can be converted into dataframes.
248
  """)
249
+
250
+ # Section: Reading HTML Files
251
+
252
+ st.write("**How to Read HTML Files:**")
 
253
  st.code("""
254
  import pandas as pd
255
+ tables = pd.read_html("path_or_url")
256
  """, language="python")
257
+
258
  st.write("""
259
+ - Use `pd.read_html()` to read tables from an HTML file or a website.
260
+ - This function collects all tables and gives them as a list of dataframes.
261
  """)
262
+
263
+ st.write("**How to Get Specific Tables:**")
264
  st.code("""
265
+ # Select the first table from the list
266
  table = tables[0]
267
  """, language="python")
268
+
269
  st.write("""
270
+ - The tables are stored as a list, and you can access them using their index number.
 
271
  """)
272
 
273
  st.write("**Limitations:**")
274
  st.write("""
275
+ - Some HTML files or websites cannot be read, even if they have tables.
276
+ - Issues like file permissions or restrictions may stop reading.
277
  """)
278
+
279
+ st.write("**Using `match` to Find Specific Tables:**")
280
  st.code("""
281
+ # Read a specific table by searching for a keyword
282
+ tables = pd.read_html("path_or_url", match="keyword")
283
  """, language="python")
284
+
285
  st.write("""
286
+ - The `match` parameter lets you find tables with specific keywords.
287
+ - This is useful to pick the right table when many are present.
288
  """)
289
 
290
+ # Section: Exporting DataFrames
291
  st.header("Exporting DataFrames to HTML")
292
 
293
+ st.write("**How to Export a DataFrame to HTML:**")
294
  st.code("""
295
+ # Save a dataframe as an HTML file
296
  df.to_html("output.html")
297
  """, language="python")
298
 
299
  st.write("""
300
+ - This converts your dataframe into an HTML file.
301
+ - You can save the HTML file at a specified location.
302
  """)
 
303
 
 
 
304
 
305
  # Unstructured Data - Image Page
306
  def image_details_page():