File size: 2,823 Bytes
595c6a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102






























# import requests
# from bs4 import BeautifulSoup
# import os
# import shutil
# import re

# import google.generativeai as genai

# def download_image(img_url, folder):
#     try:
#         response = requests.get(img_url, stream=True)
#         filename = os.path.join(folder, img_url.split("/")[-1])
#         with open(filename, 'wb') as file:
#             for chunk in response.iter_content(1024):
#                 file.write(chunk)
#         print(f"Downloaded {filename}")
#     except Exception as e:
#         print(f"Failed to download {img_url}. Reason: {e}")

# def is_logo(img_url):
#     logo_keywords = ['logo', 'brand', 'icon', 'favicon']
#     return any(keyword in img_url.lower() for keyword in logo_keywords)

# def extract_images(url, folder):
#     if os.path.exists(folder):
#         shutil.rmtree(folder)
#     os.makedirs(folder)

#     response = requests.get(url)
#     soup = BeautifulSoup(response.text, 'html.parser')
    
#     images = soup.find_all('img')
#     img_urls = [img['src'] for img in images if 'src' in img.attrs]
    
#     for img_url in img_urls:
#         if img_url.startswith('http'):
#             if not is_logo(img_url):
#                 download_image(img_url, folder)
#         else:
#             img_url = requests.compat.urljoin(url, img_url)
#             if not is_logo(img_url):
#                 download_image(img_url, folder)


# def process_script(script):
#     """Used to process the script into dictionary format"""
#     dict = {}
#     title_matches = re.findall(r'<title>(.*?)</title>', script, re.DOTALL)
#     description_matches = re.findall(r'<description>(.*?)</description>', script, re.DOTALL)
#     dict['title'] = title_matches[0] if title_matches else "No title found"
#     dict['description'] = description_matches[0] if description_matches else "No description found"
#     return dict

# def analyse_images(url):
#     imgs_dicts = []

#     genai.configure(api_key='AIzaSyBKo19PtvV9oSMRr4R1wJUueyWOL4n5e5c')

#     model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")

#     files = [ os.path.join('downloaded_images', file) for file in os.listdir('downloaded_images')]

#     for img in files:
#         sample_file = genai.upload_file(path=img)
#         file = genai.get_file(name=sample_file.name)
#         response = model.generate_content([sample_file, f"short description of the image from the website {url} and give a title for the image with title in <title> tag and description in <description> tag"])
#         img_dict = process_script(response.text)
#         img_dict['img_path'] = img
#         imgs_dicts.append(img_dict)
#         print(img_dict)

#     return imgs_dicts