falcon-hackathon / utils.py
Prudvireddy's picture
Upload 5 files
595c6a8 verified
# import requests
# from bs4 import BeautifulSoup
# import os
# import shutil
# import re
# import google.generativeai as genai
# def download_image(img_url, folder):
# try:
# response = requests.get(img_url, stream=True)
# filename = os.path.join(folder, img_url.split("/")[-1])
# with open(filename, 'wb') as file:
# for chunk in response.iter_content(1024):
# file.write(chunk)
# print(f"Downloaded {filename}")
# except Exception as e:
# print(f"Failed to download {img_url}. Reason: {e}")
# def is_logo(img_url):
# logo_keywords = ['logo', 'brand', 'icon', 'favicon']
# return any(keyword in img_url.lower() for keyword in logo_keywords)
# def extract_images(url, folder):
# if os.path.exists(folder):
# shutil.rmtree(folder)
# os.makedirs(folder)
# response = requests.get(url)
# soup = BeautifulSoup(response.text, 'html.parser')
# images = soup.find_all('img')
# img_urls = [img['src'] for img in images if 'src' in img.attrs]
# for img_url in img_urls:
# if img_url.startswith('http'):
# if not is_logo(img_url):
# download_image(img_url, folder)
# else:
# img_url = requests.compat.urljoin(url, img_url)
# if not is_logo(img_url):
# download_image(img_url, folder)
# def process_script(script):
# """Used to process the script into dictionary format"""
# dict = {}
# title_matches = re.findall(r'<title>(.*?)</title>', script, re.DOTALL)
# description_matches = re.findall(r'<description>(.*?)</description>', script, re.DOTALL)
# dict['title'] = title_matches[0] if title_matches else "No title found"
# dict['description'] = description_matches[0] if description_matches else "No description found"
# return dict
# def analyse_images(url):
# imgs_dicts = []
# genai.configure(api_key='AIzaSyBKo19PtvV9oSMRr4R1wJUueyWOL4n5e5c')
# model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")
# files = [ os.path.join('downloaded_images', file) for file in os.listdir('downloaded_images')]
# for img in files:
# sample_file = genai.upload_file(path=img)
# file = genai.get_file(name=sample_file.name)
# response = model.generate_content([sample_file, f"short description of the image from the website {url} and give a title for the image with title in <title> tag and description in <description> tag"])
# img_dict = process_script(response.text)
# img_dict['img_path'] = img
# imgs_dicts.append(img_dict)
# print(img_dict)
# return imgs_dicts