Spaces:

Prudvireddy
/

falcon-hackathon

Sleeping

App Files Files Community

falcon-hackathon / utils.py

Prudvireddy

Upload 5 files

595c6a8 verified over 1 year ago

raw

history blame contribute delete

2.82 kB































	# import requests
	# from bs4 import BeautifulSoup
	# import os
	# import shutil
	# import re

	# import google.generativeai as genai

	# def download_image(img_url, folder):
	# try:
	# response = requests.get(img_url, stream=True)
	# filename = os.path.join(folder, img_url.split("/")[-1])
	# with open(filename, 'wb') as file:
	# for chunk in response.iter_content(1024):
	# file.write(chunk)
	# print(f"Downloaded {filename}")
	# except Exception as e:
	# print(f"Failed to download {img_url}. Reason: {e}")

	# def is_logo(img_url):
	# logo_keywords = ['logo', 'brand', 'icon', 'favicon']
	# return any(keyword in img_url.lower() for keyword in logo_keywords)

	# def extract_images(url, folder):
	# if os.path.exists(folder):
	# shutil.rmtree(folder)
	# os.makedirs(folder)

	# response = requests.get(url)
	# soup = BeautifulSoup(response.text, 'html.parser')

	# images = soup.find_all('img')
	# img_urls = [img['src'] for img in images if 'src' in img.attrs]

	# for img_url in img_urls:
	# if img_url.startswith('http'):
	# if not is_logo(img_url):
	# download_image(img_url, folder)
	# else:
	# img_url = requests.compat.urljoin(url, img_url)
	# if not is_logo(img_url):
	# download_image(img_url, folder)


	# def process_script(script):
	# """Used to process the script into dictionary format"""
	# dict = {}
	# title_matches = re.findall(r'<title>(.*?)</title>', script, re.DOTALL)
	# description_matches = re.findall(r'<description>(.*?)</description>', script, re.DOTALL)
	# dict['title'] = title_matches[0] if title_matches else "No title found"
	# dict['description'] = description_matches[0] if description_matches else "No description found"
	# return dict

	# def analyse_images(url):
	# imgs_dicts = []

	# genai.configure(api_key='AIzaSyBKo19PtvV9oSMRr4R1wJUueyWOL4n5e5c')

	# model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")

	# files = [ os.path.join('downloaded_images', file) for file in os.listdir('downloaded_images')]

	# for img in files:
	# sample_file = genai.upload_file(path=img)
	# file = genai.get_file(name=sample_file.name)
	# response = model.generate_content([sample_file, f"short description of the image from the website {url} and give a title for the image with title in <title> tag and description in <description> tag"])
	# img_dict = process_script(response.text)
	# img_dict['img_path'] = img
	# imgs_dicts.append(img_dict)
	# print(img_dict)

	# return imgs_dicts