| """ |
| from bs4 import BeautifulSoup |
| |
| |
| with open("mini_dataset/6.html") as f: |
| test = f.read() |
| |
| soup = BeautifulSoup(test, "html.parser") |
| """ |
|
|
|
|
| |
| def has_title(soup): |
| if soup.title is None: |
| return 0 |
| if len(soup.title.text) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_input(soup): |
| if len(soup.find_all("input")): |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_button(soup): |
| if len(soup.find_all("button")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_image(soup): |
| if len(soup.find_all("image")) == 0: |
| return 0 |
| else: |
| return 1 |
|
|
|
|
| |
| def has_submit(soup): |
| for button in soup.find_all("input"): |
| if button.get("type") == "submit": |
| return 1 |
| else: |
| pass |
| return 0 |
|
|
|
|
| |
| def has_link(soup): |
| if len(soup.find_all("link")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_password(soup): |
| for input in soup.find_all("input"): |
| if (input.get("type") or input.get("name") or input.get("id")) == "password": |
| return 1 |
| else: |
| pass |
| return 0 |
|
|
|
|
| |
| def has_email_input(soup): |
| for input in soup.find_all("input"): |
| if (input.get("type") or input.get("id") or input.get("name")) == "email": |
| return 1 |
| else: |
| pass |
| return 0 |
|
|
|
|
| |
| def has_hidden_element(soup): |
| for input in soup.find_all("input"): |
| if input.get("type") == "hidden": |
| return 1 |
| else: |
| pass |
| return 0 |
|
|
|
|
| |
| def has_audio(soup): |
| if len(soup.find_all("audio")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_video(soup): |
| if len(soup.find_all("video")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def number_of_inputs(soup): |
| return len(soup.find_all("input")) |
|
|
|
|
| |
| def number_of_buttons(soup): |
| return len(soup.find_all("button")) |
|
|
|
|
| |
| def number_of_images(soup): |
| image_tags = len(soup.find_all("image")) |
| count = 0 |
| for meta in soup.find_all("meta"): |
| if meta.get("type") or meta.get("name") == "image": |
| count += 1 |
| return image_tags + count |
|
|
|
|
| |
| def number_of_option(soup): |
| return len(soup.find_all("option")) |
|
|
|
|
| |
| def number_of_list(soup): |
| return len(soup.find_all("li")) |
|
|
|
|
| |
| def number_of_TH(soup): |
| return len(soup.find_all("th")) |
|
|
|
|
| |
| def number_of_TR(soup): |
| return len(soup.find_all("tr")) |
|
|
|
|
| |
| def number_of_href(soup): |
| count = 0 |
| for link in soup.find_all("link"): |
| if link.get("href"): |
| count += 1 |
| return count |
|
|
|
|
| |
| def number_of_paragraph(soup): |
| return len(soup.find_all("p")) |
|
|
|
|
| |
| def number_of_script(soup): |
| return len(soup.find_all("script")) |
|
|
|
|
| |
| def length_of_title(soup): |
| if soup.title == None: |
| return 0 |
| return len(soup.title.text) |
|
|
|
|
| """ |
| print("has_title --> ", has_title(soup)) |
| print("has_input --> ", has_input(soup)) |
| print("has_button --> ", has_button(soup)) |
| print("has_image --> ", has_image(soup)) |
| print("has_submit --> ", has_submit(soup)) |
| print("has_link --> ", has_link(soup)) |
| print("has_password --> ", has_password(soup)) |
| print("has_email_input --> ", has_email_input(soup)) |
| print("has_hidden_element --> ", has_hidden_element(soup)) |
| print("has_audio --> ", has_audio(soup)) |
| print("has_video --> ", has_video(soup)) |
| print("number_of_inputs --> ", number_of_inputs(soup)) |
| print("number_of_buttons --> ", number_of_buttons(soup)) |
| print("number_of_images --> ", number_of_images(soup)) |
| print("number_of_option --> ", number_of_option(soup)) |
| print("number_of_list --> ", number_of_list(soup)) |
| print("number_of_TH --> ", number_of_TH(soup)) |
| print("number_of_TR --> ", number_of_TR(soup)) |
| print("number_of_href --> ", number_of_href(soup)) |
| print("number_of_paragraph --> ", number_of_paragraph(soup)) |
| print("number_of_script --> ", number_of_script(soup)) |
| print("length_of_title --> ", length_of_title(soup)) |
| |
| """ |
|
|
|
|
| |
| def has_h1(soup): |
| if len(soup.find_all("h1")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_h2(soup): |
| if len(soup.find_all("h2")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_h3(soup): |
| if len(soup.find_all("h3")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def length_of_text(soup): |
| return len(soup.get_text()) |
|
|
|
|
| |
| def number_of_clickable_button(soup): |
| count = 0 |
| for button in soup.find_all("button"): |
| if button.get("type") == "button": |
| count += 1 |
| return count |
|
|
|
|
| |
| def number_of_a(soup): |
| return len(soup.find_all("a")) |
|
|
|
|
| |
| def number_of_img(soup): |
| return len(soup.find_all("img")) |
|
|
|
|
| |
| def number_of_div(soup): |
| return len(soup.find_all("div")) |
|
|
|
|
| |
| def number_of_figure(soup): |
| return len(soup.find_all("figure")) |
|
|
|
|
| |
| def has_footer(soup): |
| if len(soup.find_all("footer")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_form(soup): |
| if len(soup.find_all("form")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_text_area(soup): |
| if len(soup.find_all("textarea")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_iframe(soup): |
| if len(soup.find_all("iframe")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_text_input(soup): |
| for input in soup.find_all("input"): |
| if input.get("type") == "text": |
| return 1 |
| return 0 |
|
|
|
|
| |
| def number_of_meta(soup): |
| return len(soup.find_all("meta")) |
|
|
|
|
| |
| def has_nav(soup): |
| if len(soup.find_all("nav")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_object(soup): |
| if len(soup.find_all("object")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def has_picture(soup): |
| if len(soup.find_all("picture")) > 0: |
| return 1 |
| else: |
| return 0 |
|
|
|
|
| |
| def number_of_sources(soup): |
| return len(soup.find_all("source")) |
|
|
|
|
| |
| def number_of_span(soup): |
| return len(soup.find_all("span")) |
|
|
|
|
| |
| def number_of_table(soup): |
| return len(soup.find_all("table")) |
|
|
|
|
|
|
|
|
|
|
|
|