Spaces:
Sleeping
Sleeping
File size: 2,823 Bytes
595c6a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# import requests
# from bs4 import BeautifulSoup
# import os
# import shutil
# import re
# import google.generativeai as genai
# def download_image(img_url, folder):
# try:
# response = requests.get(img_url, stream=True)
# filename = os.path.join(folder, img_url.split("/")[-1])
# with open(filename, 'wb') as file:
# for chunk in response.iter_content(1024):
# file.write(chunk)
# print(f"Downloaded {filename}")
# except Exception as e:
# print(f"Failed to download {img_url}. Reason: {e}")
# def is_logo(img_url):
# logo_keywords = ['logo', 'brand', 'icon', 'favicon']
# return any(keyword in img_url.lower() for keyword in logo_keywords)
# def extract_images(url, folder):
# if os.path.exists(folder):
# shutil.rmtree(folder)
# os.makedirs(folder)
# response = requests.get(url)
# soup = BeautifulSoup(response.text, 'html.parser')
# images = soup.find_all('img')
# img_urls = [img['src'] for img in images if 'src' in img.attrs]
# for img_url in img_urls:
# if img_url.startswith('http'):
# if not is_logo(img_url):
# download_image(img_url, folder)
# else:
# img_url = requests.compat.urljoin(url, img_url)
# if not is_logo(img_url):
# download_image(img_url, folder)
# def process_script(script):
# """Used to process the script into dictionary format"""
# dict = {}
# title_matches = re.findall(r'<title>(.*?)</title>', script, re.DOTALL)
# description_matches = re.findall(r'<description>(.*?)</description>', script, re.DOTALL)
# dict['title'] = title_matches[0] if title_matches else "No title found"
# dict['description'] = description_matches[0] if description_matches else "No description found"
# return dict
# def analyse_images(url):
# imgs_dicts = []
# genai.configure(api_key='AIzaSyBKo19PtvV9oSMRr4R1wJUueyWOL4n5e5c')
# model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")
# files = [ os.path.join('downloaded_images', file) for file in os.listdir('downloaded_images')]
# for img in files:
# sample_file = genai.upload_file(path=img)
# file = genai.get_file(name=sample_file.name)
# response = model.generate_content([sample_file, f"short description of the image from the website {url} and give a title for the image with title in <title> tag and description in <description> tag"])
# img_dict = process_script(response.text)
# img_dict['img_path'] = img
# imgs_dicts.append(img_dict)
# print(img_dict)
# return imgs_dicts |