Spaces:
Sleeping
Sleeping
Update pages/hari.py
Browse files- pages/hari.py +40 -38
pages/hari.py
CHANGED
|
@@ -230,75 +230,77 @@ def html_details_page():
|
|
| 230 |
**HTML** (HyperText Markup Language) is used to structure web pages.
|
| 231 |
|
| 232 |
- Semi-structured data with nested tags.
|
| 233 |
-
- Libraries like `BeautifulSoup` help parse and extract information.
|
| 234 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
st.write("""
|
| 236 |
-
- **HTML
|
| 237 |
-
- HTML uses tags like `<table>`, `<tr>`, `<th>`, and `<td>` to
|
| 238 |
-
- Unlike XML, HTML
|
| 239 |
-
- Not all HTML
|
| 240 |
-
-
|
| 241 |
""")
|
| 242 |
-
|
| 243 |
-
# Reading HTML Files
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
st.write("**Reading HTML Files:**")
|
| 247 |
st.code("""
|
| 248 |
import pandas as pd
|
| 249 |
-
tables = pd.read_html(
|
| 250 |
""", language="python")
|
| 251 |
-
|
| 252 |
st.write("""
|
| 253 |
-
-
|
| 254 |
-
-
|
| 255 |
""")
|
| 256 |
-
|
| 257 |
-
st.write("**
|
| 258 |
st.code("""
|
| 259 |
-
#
|
| 260 |
table = tables[0]
|
| 261 |
""", language="python")
|
| 262 |
-
|
| 263 |
st.write("""
|
| 264 |
-
-
|
| 265 |
-
- Use indexing to select the table you want to work with.
|
| 266 |
""")
|
| 267 |
|
| 268 |
st.write("**Limitations:**")
|
| 269 |
st.write("""
|
| 270 |
-
-
|
| 271 |
-
- Issues like
|
| 272 |
""")
|
| 273 |
-
|
| 274 |
-
st.write("**Using
|
| 275 |
st.code("""
|
| 276 |
-
#
|
| 277 |
-
tables = pd.read_html(
|
| 278 |
""", language="python")
|
| 279 |
-
|
| 280 |
st.write("""
|
| 281 |
-
-
|
| 282 |
-
-
|
| 283 |
""")
|
| 284 |
|
| 285 |
-
# Exporting DataFrames
|
| 286 |
st.header("Exporting DataFrames to HTML")
|
| 287 |
|
| 288 |
-
st.write("**
|
| 289 |
st.code("""
|
| 290 |
-
#
|
| 291 |
df.to_html("output.html")
|
| 292 |
""", language="python")
|
| 293 |
|
| 294 |
st.write("""
|
| 295 |
-
-
|
| 296 |
-
-
|
| 297 |
""")
|
| 298 |
-
st.code('from bs4 import BeautifulSoup\nsoup = BeautifulSoup(open("file.html"))', language="python")
|
| 299 |
|
| 300 |
-
if st.button("Back to Home"):
|
| 301 |
-
st.session_state['page'] = "home"
|
| 302 |
|
| 303 |
# Unstructured Data - Image Page
|
| 304 |
def image_details_page():
|
|
|
|
| 230 |
**HTML** (HyperText Markup Language) is used to structure web pages.
|
| 231 |
|
| 232 |
- Semi-structured data with nested tags.
|
|
|
|
| 233 |
""")
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# App title
|
| 237 |
+
st.title("Working with HTML Data in Python")
|
| 238 |
+
|
| 239 |
+
# Section: HTML and DataFrames
|
| 240 |
+
st.header("HTML and DataFrames")
|
| 241 |
+
|
| 242 |
st.write("""
|
| 243 |
+
- **HTML** stands for HyperText Markup Language and is a semi-structured format.
|
| 244 |
+
- HTML uses tags like `<table>`, `<tr>`, `<th>`, and `<td>` to show table data.
|
| 245 |
+
- Unlike XML, HTML doesn’t let you create any custom tags.
|
| 246 |
+
- Not all HTML can be changed into dataframes, especially plain text like paragraphs.
|
| 247 |
+
- Usually, only table-related tags (`<table>`, `<tr>`, `<th>`, `<td>`) can be converted into dataframes.
|
| 248 |
""")
|
| 249 |
+
|
| 250 |
+
# Section: Reading HTML Files
|
| 251 |
+
|
| 252 |
+
st.write("**How to Read HTML Files:**")
|
|
|
|
| 253 |
st.code("""
|
| 254 |
import pandas as pd
|
| 255 |
+
tables = pd.read_html("path_or_url")
|
| 256 |
""", language="python")
|
| 257 |
+
|
| 258 |
st.write("""
|
| 259 |
+
- Use `pd.read_html()` to read tables from an HTML file or a website.
|
| 260 |
+
- This function collects all tables and gives them as a list of dataframes.
|
| 261 |
""")
|
| 262 |
+
|
| 263 |
+
st.write("**How to Get Specific Tables:**")
|
| 264 |
st.code("""
|
| 265 |
+
# Select the first table from the list
|
| 266 |
table = tables[0]
|
| 267 |
""", language="python")
|
| 268 |
+
|
| 269 |
st.write("""
|
| 270 |
+
- The tables are stored as a list, and you can access them using their index number.
|
|
|
|
| 271 |
""")
|
| 272 |
|
| 273 |
st.write("**Limitations:**")
|
| 274 |
st.write("""
|
| 275 |
+
- Some HTML files or websites cannot be read, even if they have tables.
|
| 276 |
+
- Issues like file permissions or restrictions may stop reading.
|
| 277 |
""")
|
| 278 |
+
|
| 279 |
+
st.write("**Using `match` to Find Specific Tables:**")
|
| 280 |
st.code("""
|
| 281 |
+
# Read a specific table by searching for a keyword
|
| 282 |
+
tables = pd.read_html("path_or_url", match="keyword")
|
| 283 |
""", language="python")
|
| 284 |
+
|
| 285 |
st.write("""
|
| 286 |
+
- The `match` parameter lets you find tables with specific keywords.
|
| 287 |
+
- This is useful to pick the right table when many are present.
|
| 288 |
""")
|
| 289 |
|
| 290 |
+
# Section: Exporting DataFrames
|
| 291 |
st.header("Exporting DataFrames to HTML")
|
| 292 |
|
| 293 |
+
st.write("**How to Export a DataFrame to HTML:**")
|
| 294 |
st.code("""
|
| 295 |
+
# Save a dataframe as an HTML file
|
| 296 |
df.to_html("output.html")
|
| 297 |
""", language="python")
|
| 298 |
|
| 299 |
st.write("""
|
| 300 |
+
- This converts your dataframe into an HTML file.
|
| 301 |
+
- You can save the HTML file at a specified location.
|
| 302 |
""")
|
|
|
|
| 303 |
|
|
|
|
|
|
|
| 304 |
|
| 305 |
# Unstructured Data - Image Page
|
| 306 |
def image_details_page():
|