Spaces:
Sleeping
Sleeping
| # import requests | |
| # from bs4 import BeautifulSoup | |
| # import os | |
| # import shutil | |
| # import re | |
| # import google.generativeai as genai | |
| # def download_image(img_url, folder): | |
| # try: | |
| # response = requests.get(img_url, stream=True) | |
| # filename = os.path.join(folder, img_url.split("/")[-1]) | |
| # with open(filename, 'wb') as file: | |
| # for chunk in response.iter_content(1024): | |
| # file.write(chunk) | |
| # print(f"Downloaded {filename}") | |
| # except Exception as e: | |
| # print(f"Failed to download {img_url}. Reason: {e}") | |
| # def is_logo(img_url): | |
| # logo_keywords = ['logo', 'brand', 'icon', 'favicon'] | |
| # return any(keyword in img_url.lower() for keyword in logo_keywords) | |
| # def extract_images(url, folder): | |
| # if os.path.exists(folder): | |
| # shutil.rmtree(folder) | |
| # os.makedirs(folder) | |
| # response = requests.get(url) | |
| # soup = BeautifulSoup(response.text, 'html.parser') | |
| # images = soup.find_all('img') | |
| # img_urls = [img['src'] for img in images if 'src' in img.attrs] | |
| # for img_url in img_urls: | |
| # if img_url.startswith('http'): | |
| # if not is_logo(img_url): | |
| # download_image(img_url, folder) | |
| # else: | |
| # img_url = requests.compat.urljoin(url, img_url) | |
| # if not is_logo(img_url): | |
| # download_image(img_url, folder) | |
| # def process_script(script): | |
| # """Used to process the script into dictionary format""" | |
| # dict = {} | |
| # title_matches = re.findall(r'<title>(.*?)</title>', script, re.DOTALL) | |
| # description_matches = re.findall(r'<description>(.*?)</description>', script, re.DOTALL) | |
| # dict['title'] = title_matches[0] if title_matches else "No title found" | |
| # dict['description'] = description_matches[0] if description_matches else "No description found" | |
| # return dict | |
| # def analyse_images(url): | |
| # imgs_dicts = [] | |
| # genai.configure(api_key='AIzaSyBKo19PtvV9oSMRr4R1wJUueyWOL4n5e5c') | |
| # model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest") | |
| # files = [ os.path.join('downloaded_images', file) for file in os.listdir('downloaded_images')] | |
| # for img in files: | |
| # sample_file = genai.upload_file(path=img) | |
| # file = genai.get_file(name=sample_file.name) | |
| # response = model.generate_content([sample_file, f"short description of the image from the website {url} and give a title for the image with title in <title> tag and description in <description> tag"]) | |
| # img_dict = process_script(response.text) | |
| # img_dict['img_path'] = img | |
| # imgs_dicts.append(img_dict) | |
| # print(img_dict) | |
| # return imgs_dicts |