dhanvanth183 commited on
Commit
267bd16
·
verified ·
1 Parent(s): e2ec2d4

Upload 16 files

Browse files
.env ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ GROQ_API_KEY = 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
2
+ #GROQ_API_KEY=your_groq_api_key_here
3
+ USER_AGENT=MyApp/1.0
4
+ GOOGLE_API_KEY = "AIzaSyAmgPXlA9KIons_kvhZSJKEmAxunTc8cTM"
5
+ TAVILY_API_KEY = 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
6
+ PLACES_API_KEY = AIzaSyBOn9th7l3IskAG-BnDOBpjDmVWB3L7jUg
.gitattributes CHANGED
@@ -1,35 +1,3 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.jpg filter=lfs diff=lfs merge=lfs -text
2
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
3
+ *.png filter=lfs diff=lfs merge=lfs -textS3_chromadb.zip filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ *.png
2
+ *.jpg
3
+ # ignore my env files to not reveal the api keys
4
+ *.env
Dataset.csv ADDED
The diff for this file is too large to render. See raw diff
 
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Restaurantguide
3
- emoji: 🌍
4
- colorFrom: green
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.9.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: AI_guide
3
+ app_file: main.py
 
 
4
  sdk: gradio
5
+ sdk_version: 5.3.0
 
 
6
  ---
 
 
S3Dataset.csv ADDED
The diff for this file is too large to render. See raw diff
 
S3_chromadb.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc56a182384b7d1e79fb83f54273d4290c88e47eab98d3f0ca6ff898c27d623
3
+ size 10180157
SRGAN.jpg ADDED

Git LFS Details

  • SHA256: cad24b2fc26760ded021cc9b3394225035ece6e7b1f0b1366946cf155ad70dec
  • Pointer size: 130 Bytes
  • Size of remote file: 59.8 kB
chromadblocal.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sentence_transformers import SentenceTransformer
3
+ import chromadb
4
+
5
+ class DataHandler:
6
+ def __init__(self, model_name='sentence-transformers/all-MiniLM-L6-v2', db_path="D:/Projects/Liminal/AI_Guide/resources/S3_chromadb"):
7
+ # Initialize the ChromaDB persistent client and embedding model
8
+ self.client = chromadb.PersistentClient(path=db_path) # ChromaDB with persistent storage at the specified path
9
+ self.model = SentenceTransformer(model_name)
10
+ self.collection = self.client.get_or_create_collection(name="food_places")
11
+
12
+ def load_data(self, file_path):
13
+ """
14
+ Load data from a CSV file.
15
+ :param file_path: Path to the CSV file.
16
+ :return: DataFrame with loaded data.
17
+ """
18
+ return pd.read_csv(file_path)
19
+
20
+ def get_unique_cities(self):
21
+ """
22
+ Retrieve unique cities from the restaurant dataset.
23
+ :return: List of unique cities.
24
+ """
25
+ df = self.load_data("D:/Projects/Liminal/AI_Guide/resources/S3Dataset.csv")
26
+ return sorted(df['City'].unique().tolist())
27
+ def process_data(self, df):
28
+ """
29
+ Preprocess the DataFrame and generate text embeddings for each row.
30
+ :param df: DataFrame containing restaurant data.
31
+ """
32
+ count = 0
33
+ for idx, row in df.iterrows():
34
+
35
+ combined_text = f"{row['Cuisine']} {row['Location']} {row['Locality']} {row['Cost']}"
36
+ embedding = self.model.encode(combined_text).tolist()
37
+
38
+ # Create new metadata for each row
39
+ new_metadata = {
40
+ "restaurant_name": row['Name'],
41
+ "location": row['Location'],
42
+ "locality": row['Locality'],
43
+ "city": row['City'],
44
+ "votes": row['Votes'],
45
+ "cost": row['Cost'],
46
+ "rating": row['Rating'],
47
+ "uuid": row['Unique_ID'],
48
+ "image_path": row['Image_Path']
49
+ }
50
+
51
+ # Check if the ID already exists in the collection
52
+ existing_docs = self.collection.get(ids=[str(idx)]) # Fetch existing document by ID
53
+
54
+ if existing_docs['ids']: # If the ID exists
55
+ # Check if the metadata is the same
56
+ existing_metadata = existing_docs['metadatas'][0] # Get the existing metadata for comparison
57
+ if existing_metadata == new_metadata:
58
+ continue # Skip if metadata is the same
59
+
60
+ # Insert or update the collection
61
+ self.collection.add(
62
+ documents=[combined_text], # Add text for reference
63
+ metadatas=[new_metadata], # Use the new_metadata created before the condition
64
+ embeddings=[embedding],
65
+ ids=[str(idx)] # Use index as ID
66
+ )
67
+ count = count+1
68
+ if count%100 == 0:
69
+ print(f"Inserted/Updated {count} rows into ChromaDB.")
70
+
71
+ def get_collection(self):
72
+ return self.collection # Return the ChromaDB collection instance
73
+
74
+ # If you want to run this file separately to update the vector DB
75
+ if __name__ == "__main__":
76
+ data_handler = DataHandler()
77
+ df = data_handler.load_data("D:/Projects/Liminal/AI_Guide/resources/S3Dataset.csv")
78
+ data_handler.process_data(df)
main.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from chromadblocal import DataHandler
3
+ from queryhandler import QueryHandler
4
+
5
+ # Initialize the DataHandler to access the ChromaDB collection and get unique cities
6
+ data_handler = DataHandler()
7
+ available_cities = data_handler.get_unique_cities() # Get list of unique cities
8
+ # Initialize the QueryHandler with the ChromaDB collection
9
+ query_handler = QueryHandler(data_handler.get_collection())
10
+
11
+ # Function to handle user queries
12
+ def process_query(user_prompt):
13
+ # Query the collection and return top results
14
+ results = query_handler.query(user_prompt, n_results=5)
15
+
16
+ # Generate the response based on the query results
17
+ recommendation_text, image_paths = query_handler.generate_response(results, user_prompt)
18
+
19
+ # Return response text and image paths (not gr.Image components)
20
+ return recommendation_text, image_paths # Return text and image paths
21
+
22
+ # Display greeting message along with available cities
23
+ def display_greeting_and_cities():
24
+ greeting = ("Hi there! Got questions about restaurants, cuisines, locations, ratings, or costs in these cities?"
25
+ " Just ask, and I’ll be thrilled to help you find what you need!")
26
+ cities_list = ", ".join(available_cities)
27
+ return f"{greeting}\n\nAvailable cities: {cities_list}"
28
+
29
+ # Create a Gradio interface
30
+ iface = gr.Interface(
31
+ fn=process_query, # Function to process input
32
+ inputs=gr.Textbox(label="Ask about restaurants, cuisines, or more"), # Custom label for input
33
+ outputs=[
34
+ gr.Textbox(label="Recommended Restaurants"), # Text output for recommendations
35
+ gr.Gallery(label="Restaurant Images") # Gallery to display images
36
+ ],
37
+ title="Restaurant Query System",
38
+ description=display_greeting_and_cities() # Show greeting and cities above the input prompt
39
+ )
40
+
41
+ # Launch the Gradio app with allowed_paths specified
42
+ if __name__ == "__main__":
43
+ iface.launch(
44
+ server_name="127.0.0.1",
45
+ server_port=7860,
46
+ share=True,
47
+ allowed_paths=["D:/Projects/Liminal/AI_Guide/resources/uuid_images"] # Specify allowed paths during launch
48
+ )
proposed_arch.jpg ADDED

Git LFS Details

  • SHA256: 230100077e73a8e69984f1e3f9ea9d90f7547882d95194ff79cd457b34e4448e
  • Pointer size: 131 Bytes
  • Size of remote file: 156 kB
queryhandler.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from langchain_groq import ChatGroq
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ import os
5
+ from dotenv import load_dotenv
6
+ from collections import deque
7
+
8
+ # Load environment variables from .env file
9
+ load_dotenv()
10
+
11
+
12
+ class QueryHandler:
13
+ def __init__(self, collection, llm_model_name='llama-3.3-70b-versatile',
14
+ embed_model_name='sentence-transformers/all-MiniLM-L6-v2', max_history=5):
15
+ self.collection = collection
16
+ self.embed_model = SentenceTransformer(embed_model_name)
17
+
18
+ # Load the Groq API key from environment variable
19
+ self.groq_api_key = os.getenv('GROQ_API_KEY')
20
+ if not self.groq_api_key:
21
+ raise ValueError("GROQ_API_KEY environment variable not set.")
22
+
23
+ # Initialize the LLM client with the API key
24
+ self.llm = ChatGroq(groq_api_key=self.groq_api_key, model_name=llm_model_name)
25
+
26
+ # Session history management
27
+ self.history = deque(maxlen=max_history) # Store last 'max_history' interactions
28
+
29
+ def extract_filters(self, user_prompt):
30
+ """
31
+ Use the LLM to extract filter criteria from the user prompt.
32
+ """
33
+ extraction_prompt = ChatPromptTemplate.from_template(
34
+ """
35
+ Analyze the following query and extract filters for restaurant recommendations:
36
+ - Cuisine type (if mentioned): Look for specific cuisine preferences.
37
+ - Location: Identify the city or locality.
38
+ - Price range: Look for a maximum or minimum price (e.g., $100).
39
+
40
+ If any of these details are missing, return them as None.
41
+ Query: {input}
42
+ Provide the extracted filters in JSON format like:
43
+ {{
44
+ "cuisine_type": "string or None",
45
+ "location": "string or None",
46
+ "price": "int or None"
47
+ }}
48
+ """
49
+ )
50
+ prompt = extraction_prompt.format(input=user_prompt)
51
+ response = self.llm.invoke(prompt)
52
+ try:
53
+ filters = eval(response.content) # Convert string response to dictionary
54
+ except Exception as e:
55
+ filters = {"cuisine_type": None, "location": None, "price": None}
56
+ return filters
57
+
58
+ def query(self, user_prompt, n_results=5):
59
+ """
60
+ Process the user query and retrieve filtered results from ChromaDB.
61
+ """
62
+ # Generate embedding for the user prompt
63
+ query_embedding = self.embed_model.encode(user_prompt).tolist()
64
+
65
+ # Extract filters using the LLM
66
+ filters = self.extract_filters(user_prompt)
67
+
68
+ # Create the filter dictionary for ChromaDB
69
+ chromadb_filter = {}
70
+ if filters.get("cuisine_type"):
71
+ chromadb_filter["cuisine_type"] = filters["cuisine_type"]
72
+ if filters.get("location"):
73
+ chromadb_filter["location"] = filters["location"]
74
+ if filters.get("price"):
75
+ chromadb_filter["price"] = {"$lte": filters["price"]}
76
+
77
+ # Perform the query in ChromaDB with filters
78
+ results = self.collection.query(
79
+ query_embeddings=[query_embedding],
80
+ n_results=n_results,
81
+ where=chromadb_filter
82
+ )
83
+
84
+ return results
85
+
86
+ def get_recent_context(self):
87
+ # Retrieve recent history for context, including LLM-generated responses
88
+ context_parts = []
89
+ for idx, (prompt, results, llm_response) in enumerate(self.history):
90
+ metadatas = results.get('metadatas', [])
91
+ flat_metadatas = [item for sublist in metadatas for item in sublist]
92
+ context = "\n".join(
93
+ f"{idx + 1}. Restaurant: {metadata.get('restaurant_name', 'N/A')}, "
94
+ f"Location: {metadata.get('location', 'N/A')}, "
95
+ f"Locality: {metadata.get('locality', 'N/A')}, "
96
+ f"City: {metadata.get('city', 'N/A')}, "
97
+ f"Votes: {metadata.get('votes', 'N/A')}, "
98
+ f"Cost: {metadata.get('cost', 'N/A')}, "
99
+ f"Rating: {metadata.get('rating', 'N/A')}"
100
+ for metadata in flat_metadatas
101
+ )
102
+ # Include the LLM-generated response for each entry
103
+ context_parts.append(f"Query: {prompt}\nResults:\n{context}\nResponse:\n{llm_response}")
104
+
105
+ return "\n".join(context_parts)
106
+
107
+ def generate_response(self, results, user_prompt):
108
+ # Gather recent context from history
109
+ recent_context = self.get_recent_context()
110
+
111
+ # Current query's results
112
+ metadatas = results.get('metadatas', [])
113
+ flat_metadatas = [item for sublist in metadatas for item in sublist]
114
+
115
+ # Construct recommendation text and collect image paths
116
+ recommendation_text = ""
117
+ image_paths = [] # To store image paths for rendering
118
+ for metadata in flat_metadatas:
119
+ recommendation_text += (
120
+ f"Restaurant: {metadata.get('restaurant_name', 'N/A')}, "
121
+ f"Location: {metadata.get('location', 'N/A')}, "
122
+ f"Locality: {metadata.get('locality', 'N/A')}, "
123
+ f"City: {metadata.get('city', 'N/A')}, "
124
+ f"Votes: {metadata.get('votes', 'N/A')}, "
125
+ f"Cost: {metadata.get('cost', 'N/A')}, "
126
+ f"Rating: {metadata.get('rating', 'N/A')}\n"
127
+ )
128
+ if metadata.get('image_path'): # Append valid image paths
129
+ image_paths.append(metadata['image_path'])
130
+
131
+ # LLM prompt template with session context
132
+ prompt_template = ChatPromptTemplate.from_template(
133
+ """
134
+ Previous Conversations:
135
+ {recent_context}
136
+
137
+ Here is the data relevant to your query:
138
+
139
+ - **Name**: The restaurant's name.
140
+ - **Location, Locality, and City**: The address details of the restaurant.
141
+ - **Cuisine**: Types of cuisines offered by the restaurant.
142
+ - **Rating**: Average rating on a scale of 5.
143
+ - **Votes**: Number of people who have rated the restaurant.
144
+ - **Cost**: Average cost of dining.
145
+
146
+ You are a restaurant recommender with knowledge of restaurants, cuisines, ratings, and costs across various cities in India. Respond to queries based on the provided details and recommend the most relevant options based on user preferences. If information is incomplete, provide the best suggestions and encourage follow-up questions.
147
+
148
+ Use the recent context only if you require additional information from previous conversations. If the query is not related to the information available to you, make a generic response and let the user know you would be more than happy to help with restaurant recommendations.
149
+
150
+ <context>
151
+ {context}
152
+ </context>
153
+ Question: {input}
154
+ """
155
+ )
156
+
157
+ # Generate the prompt by formatting the template
158
+ prompt = prompt_template.format(recent_context=recent_context, context=recommendation_text, input=user_prompt)
159
+
160
+ # Use the LLM to generate the output
161
+ response = self.llm.invoke(prompt) # Pass the prompt string directly
162
+
163
+ # Store the prompt, results, and LLM-generated response in the history
164
+ self.history.append((user_prompt, results, response.content))
165
+
166
+ return response.content, image_paths # Return both recommendation text and image paths
requirements.txt ADDED
Binary file (652 Bytes). View file
 
restaurant_uuid_images.csv ADDED
The diff for this file is too large to render. See raw diff
 
s3bucket_pathadder.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+
4
+ # Load the CSV file
5
+ file_path = "D:/Projects/Liminal/AI_Guide/resources/Dataset.csv" # Update with your input CSV file path
6
+ df = pd.read_csv(file_path)
7
+
8
+ # Define S3 bucket details
9
+ bucket_name = "restaurant-resources-liminal"
10
+ region = "ap-south-1"
11
+ s3_base_url = f"https://{bucket_name}.s3.{region}.amazonaws.com/uuid_images/"
12
+
13
+ # Update the Image_path column with S3 URLs using the Unique_ID column
14
+ df['S3Image_path'] = df['Unique_ID'].apply(lambda x: f"{s3_base_url}{x}.png")
15
+
16
+ # Save the updated DataFrame back to a new CSV file
17
+ #updated_csv_path = "updated_restaurant_data.csv"
18
+ #df.to_csv(updated_csv_path, index=False)
19
+
20
+ #print(f"Updated CSV file saved to: {updated_csv_path}")
21
+ """
22
+ # Function to extract the file type and build S3 URL
23
+ def generate_s3_url(image_path, unique_id):
24
+ # Extract file extension dynamically
25
+ file_extension = os.path.splitext(image_path)[-1] # E.g., '.jpg', '.png'
26
+ # Build the full S3 URL
27
+ return f"{s3_base_url}{unique_id}{file_extension}"""
28
+
29
+ # Apply the function to create a new 'S3_Image_URL' column
30
+ #df['S3_Image_URL'] = df.apply(lambda row: generate_s3_url(row['Image_path'], row['Unique_ID']), axis=1)
31
+
32
+ # Save the updated DataFrame to a new CSV
33
+ output_file_path = 'D:/Projects/Liminal/AI_Guide/resources/S3Dataset.csv'
34
+ df.to_csv(output_file_path, index=False)
35
+
36
+ print(f"Updated CSV file saved at: {output_file_path}")
webbaseloader.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ import os
4
+ from dotenv import load_dotenv
5
+ import time
6
+
7
+ # Load environment variables from .env file
8
+ load_dotenv()
9
+
10
+ # Your Google API Key
11
+ API_KEY = os.getenv('PLACES_API_KEY')
12
+
13
+ # File paths
14
+ input_csv = "D:/Projects/Liminal/AI_Guide/resources/restaurants_1.csv" # Input CSV file
15
+ output_dir = 'D:/Projects/Liminal/AI_Guide/resources/restaurant_images' # Directory to save images
16
+ PHOTO_BASE_URL = "https://maps.googleapis.com/maps/api/place/photo"
17
+ output_csv = "D:/Projects/Liminal/AI_Guide/resources/output.csv"
18
+ # Create output directory if it doesn't exist
19
+ os.makedirs(output_dir, exist_ok=True)
20
+
21
+
22
+ def get_place_id(query, api_key):
23
+ """
24
+ Search for a place using Google Places API and return the place_id.
25
+ """
26
+ url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
27
+ params = {
28
+ 'query': query,
29
+ 'key': api_key
30
+ }
31
+ response = requests.get(url, params=params)
32
+ data = response.json()
33
+
34
+ if data['status'] == 'OK' and len(data['results']) > 0:
35
+ return data['results'][0]['place_id']
36
+ else:
37
+ print(f"Place not found for query: {query}")
38
+ return None
39
+
40
+
41
+ def get_photo_reference(place_id, api_key):
42
+ """
43
+ Get the photo reference for a place using its place_id.
44
+ """
45
+ url = "https://maps.googleapis.com/maps/api/place/details/json"
46
+ params = {
47
+ 'place_id': place_id,
48
+ 'key': api_key
49
+ }
50
+ response = requests.get(url, params=params)
51
+ data = response.json()
52
+
53
+ if data['status'] == 'OK' and 'photos' in data['result']:
54
+ return data['result']['photos'][0]['photo_reference']
55
+ else:
56
+ print(f"No photos found for place_id: {place_id}")
57
+ return None
58
+
59
+
60
+ def download_photo(photo_reference, api_key, output_path, retries=3, delay=30):
61
+ """
62
+ Download and save a photo using the photo reference with retry logic.
63
+ """
64
+ url = f"https://maps.googleapis.com/maps/api/place/photo"
65
+ params = {
66
+ 'photoreference': photo_reference,
67
+ 'key': api_key,
68
+ 'maxwidth': 400, # Specify the image resolution
69
+ 'maxheight': 400
70
+ }
71
+
72
+ for attempt in range(1, retries + 1):
73
+ try:
74
+ response = requests.get(url, params=params, stream=True, timeout=10)
75
+ if response.status_code == 200:
76
+ with open(output_path, 'wb') as file:
77
+ for chunk in response.iter_content(1024):
78
+ file.write(chunk)
79
+ print(f"Image saved to {output_path}")
80
+ return True
81
+ else:
82
+ print(f"Failed to download image: {response.status_code} - {response.text}")
83
+ except requests.exceptions.RequestException as e:
84
+ print(f"Error downloading photo (Attempt {attempt}/{retries}): {e}")
85
+
86
+ # Wait before retrying
87
+ if attempt < retries:
88
+ print(f"Retrying in {delay} seconds...")
89
+ time.sleep(delay)
90
+
91
+ print(f"Failed to download image after {retries} attempts.")
92
+ return False
93
+
94
+
95
+ def main():
96
+ """
97
+ Main script to process the input CSV, fetch place IDs, photo references, and download images.
98
+ """
99
+ # Load the CSV file
100
+ df = pd.read_csv(input_csv)
101
+
102
+ # Ensure the CSV contains the required columns
103
+ required_columns = ['Name', 'Location', 'Locality', 'City']
104
+ for col in required_columns:
105
+ if col not in df.columns:
106
+ print(f"CSV file must contain the following columns: {', '.join(required_columns)}")
107
+ return
108
+
109
+ # Add a new column for image paths
110
+ if 'Image_Path' not in df.columns:
111
+ df['Image_Path'] = None
112
+
113
+ # Get a list of existing image names in the output directory
114
+ existing_images = {os.path.splitext(f)[0] for f in os.listdir(output_dir) if
115
+ os.path.isfile(os.path.join(output_dir, f))}
116
+
117
+ # Iterate through each row in the CSV
118
+ for index, row in df.iterrows():
119
+ # Construct a sanitized image name
120
+ name = row['Name']
121
+ safe_name = "".join(c if c.isalnum() else "_" for c in name)
122
+
123
+ # Skip if the image already exists in the directory
124
+ if safe_name in existing_images:
125
+ print(f"Image already exists for {row['Name']}, skipping...")
126
+ continue
127
+
128
+ # Construct a detailed search query
129
+ location = row['Location']
130
+ locality = row['Locality']
131
+ city = row['City']
132
+ query = f"{name}, {location}, {locality}, {city}"
133
+
134
+ print(f"Processing: {query}")
135
+
136
+ # Step 1: Get place_id
137
+ place_id = get_place_id(query, API_KEY)
138
+ if not place_id:
139
+ continue
140
+
141
+ # Step 2: Get photo_reference
142
+ photo_reference = get_photo_reference(place_id, API_KEY)
143
+ if not photo_reference:
144
+ continue
145
+
146
+ # Step 3: Download the photo
147
+ output_path = os.path.join(output_dir, f"{safe_name}.jpg")
148
+
149
+ if download_photo(photo_reference, API_KEY, output_path):
150
+ # Update the DataFrame with the image path
151
+ df.at[index, 'Image_Path'] = output_path
152
+
153
+ # Save the updated DataFrame to a new CSV
154
+ df.to_csv(output_csv, index=False)
155
+ print(f"Updated CSV saved to {output_csv}")
156
+
157
+
158
+ if __name__ == "__main__":
159
+ main()