Spaces:
Runtime error
Runtime error
Upload 16 files
Browse files- .env +6 -0
- .gitattributes +3 -35
- .gitignore +4 -0
- Dataset.csv +0 -0
- README.md +3 -9
- S3Dataset.csv +0 -0
- S3_chromadb.zip +3 -0
- SRGAN.jpg +3 -0
- chromadblocal.py +78 -0
- main.py +48 -0
- proposed_arch.jpg +3 -0
- queryhandler.py +166 -0
- requirements.txt +0 -0
- restaurant_uuid_images.csv +0 -0
- s3bucket_pathadder.py +36 -0
- webbaseloader.py +159 -0
.env
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GROQ_API_KEY = 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
|
| 2 |
+
#GROQ_API_KEY=your_groq_api_key_here
|
| 3 |
+
USER_AGENT=MyApp/1.0
|
| 4 |
+
GOOGLE_API_KEY = "AIzaSyAmgPXlA9KIons_kvhZSJKEmAxunTc8cTM"
|
| 5 |
+
TAVILY_API_KEY = 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
|
| 6 |
+
PLACES_API_KEY = AIzaSyBOn9th7l3IskAG-BnDOBpjDmVWB3L7jUg
|
.gitattributes
CHANGED
|
@@ -1,35 +1,3 @@
|
|
| 1 |
-
*.
|
| 2 |
-
*.
|
| 3 |
-
*.
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.png filter=lfs diff=lfs merge=lfs -textS3_chromadb.zip filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.png
|
| 2 |
+
*.jpg
|
| 3 |
+
# ignore my env files to not reveal the api keys
|
| 4 |
+
*.env
|
Dataset.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
colorFrom: green
|
| 5 |
-
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 5.
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: AI_guide
|
| 3 |
+
app_file: main.py
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
+
sdk_version: 5.3.0
|
|
|
|
|
|
|
| 6 |
---
|
|
|
|
|
|
S3Dataset.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
S3_chromadb.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cc56a182384b7d1e79fb83f54273d4290c88e47eab98d3f0ca6ff898c27d623
|
| 3 |
+
size 10180157
|
SRGAN.jpg
ADDED
|
Git LFS Details
|
chromadblocal.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from sentence_transformers import SentenceTransformer
|
| 3 |
+
import chromadb
|
| 4 |
+
|
| 5 |
+
class DataHandler:
|
| 6 |
+
def __init__(self, model_name='sentence-transformers/all-MiniLM-L6-v2', db_path="D:/Projects/Liminal/AI_Guide/resources/S3_chromadb"):
|
| 7 |
+
# Initialize the ChromaDB persistent client and embedding model
|
| 8 |
+
self.client = chromadb.PersistentClient(path=db_path) # ChromaDB with persistent storage at the specified path
|
| 9 |
+
self.model = SentenceTransformer(model_name)
|
| 10 |
+
self.collection = self.client.get_or_create_collection(name="food_places")
|
| 11 |
+
|
| 12 |
+
def load_data(self, file_path):
|
| 13 |
+
"""
|
| 14 |
+
Load data from a CSV file.
|
| 15 |
+
:param file_path: Path to the CSV file.
|
| 16 |
+
:return: DataFrame with loaded data.
|
| 17 |
+
"""
|
| 18 |
+
return pd.read_csv(file_path)
|
| 19 |
+
|
| 20 |
+
def get_unique_cities(self):
|
| 21 |
+
"""
|
| 22 |
+
Retrieve unique cities from the restaurant dataset.
|
| 23 |
+
:return: List of unique cities.
|
| 24 |
+
"""
|
| 25 |
+
df = self.load_data("D:/Projects/Liminal/AI_Guide/resources/S3Dataset.csv")
|
| 26 |
+
return sorted(df['City'].unique().tolist())
|
| 27 |
+
def process_data(self, df):
|
| 28 |
+
"""
|
| 29 |
+
Preprocess the DataFrame and generate text embeddings for each row.
|
| 30 |
+
:param df: DataFrame containing restaurant data.
|
| 31 |
+
"""
|
| 32 |
+
count = 0
|
| 33 |
+
for idx, row in df.iterrows():
|
| 34 |
+
|
| 35 |
+
combined_text = f"{row['Cuisine']} {row['Location']} {row['Locality']} {row['Cost']}"
|
| 36 |
+
embedding = self.model.encode(combined_text).tolist()
|
| 37 |
+
|
| 38 |
+
# Create new metadata for each row
|
| 39 |
+
new_metadata = {
|
| 40 |
+
"restaurant_name": row['Name'],
|
| 41 |
+
"location": row['Location'],
|
| 42 |
+
"locality": row['Locality'],
|
| 43 |
+
"city": row['City'],
|
| 44 |
+
"votes": row['Votes'],
|
| 45 |
+
"cost": row['Cost'],
|
| 46 |
+
"rating": row['Rating'],
|
| 47 |
+
"uuid": row['Unique_ID'],
|
| 48 |
+
"image_path": row['Image_Path']
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
# Check if the ID already exists in the collection
|
| 52 |
+
existing_docs = self.collection.get(ids=[str(idx)]) # Fetch existing document by ID
|
| 53 |
+
|
| 54 |
+
if existing_docs['ids']: # If the ID exists
|
| 55 |
+
# Check if the metadata is the same
|
| 56 |
+
existing_metadata = existing_docs['metadatas'][0] # Get the existing metadata for comparison
|
| 57 |
+
if existing_metadata == new_metadata:
|
| 58 |
+
continue # Skip if metadata is the same
|
| 59 |
+
|
| 60 |
+
# Insert or update the collection
|
| 61 |
+
self.collection.add(
|
| 62 |
+
documents=[combined_text], # Add text for reference
|
| 63 |
+
metadatas=[new_metadata], # Use the new_metadata created before the condition
|
| 64 |
+
embeddings=[embedding],
|
| 65 |
+
ids=[str(idx)] # Use index as ID
|
| 66 |
+
)
|
| 67 |
+
count = count+1
|
| 68 |
+
if count%100 == 0:
|
| 69 |
+
print(f"Inserted/Updated {count} rows into ChromaDB.")
|
| 70 |
+
|
| 71 |
+
def get_collection(self):
|
| 72 |
+
return self.collection # Return the ChromaDB collection instance
|
| 73 |
+
|
| 74 |
+
# If you want to run this file separately to update the vector DB
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
data_handler = DataHandler()
|
| 77 |
+
df = data_handler.load_data("D:/Projects/Liminal/AI_Guide/resources/S3Dataset.csv")
|
| 78 |
+
data_handler.process_data(df)
|
main.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from chromadblocal import DataHandler
|
| 3 |
+
from queryhandler import QueryHandler
|
| 4 |
+
|
| 5 |
+
# Initialize the DataHandler to access the ChromaDB collection and get unique cities
|
| 6 |
+
data_handler = DataHandler()
|
| 7 |
+
available_cities = data_handler.get_unique_cities() # Get list of unique cities
|
| 8 |
+
# Initialize the QueryHandler with the ChromaDB collection
|
| 9 |
+
query_handler = QueryHandler(data_handler.get_collection())
|
| 10 |
+
|
| 11 |
+
# Function to handle user queries
|
| 12 |
+
def process_query(user_prompt):
|
| 13 |
+
# Query the collection and return top results
|
| 14 |
+
results = query_handler.query(user_prompt, n_results=5)
|
| 15 |
+
|
| 16 |
+
# Generate the response based on the query results
|
| 17 |
+
recommendation_text, image_paths = query_handler.generate_response(results, user_prompt)
|
| 18 |
+
|
| 19 |
+
# Return response text and image paths (not gr.Image components)
|
| 20 |
+
return recommendation_text, image_paths # Return text and image paths
|
| 21 |
+
|
| 22 |
+
# Display greeting message along with available cities
|
| 23 |
+
def display_greeting_and_cities():
|
| 24 |
+
greeting = ("Hi there! Got questions about restaurants, cuisines, locations, ratings, or costs in these cities?"
|
| 25 |
+
" Just ask, and I’ll be thrilled to help you find what you need!")
|
| 26 |
+
cities_list = ", ".join(available_cities)
|
| 27 |
+
return f"{greeting}\n\nAvailable cities: {cities_list}"
|
| 28 |
+
|
| 29 |
+
# Create a Gradio interface
|
| 30 |
+
iface = gr.Interface(
|
| 31 |
+
fn=process_query, # Function to process input
|
| 32 |
+
inputs=gr.Textbox(label="Ask about restaurants, cuisines, or more"), # Custom label for input
|
| 33 |
+
outputs=[
|
| 34 |
+
gr.Textbox(label="Recommended Restaurants"), # Text output for recommendations
|
| 35 |
+
gr.Gallery(label="Restaurant Images") # Gallery to display images
|
| 36 |
+
],
|
| 37 |
+
title="Restaurant Query System",
|
| 38 |
+
description=display_greeting_and_cities() # Show greeting and cities above the input prompt
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# Launch the Gradio app with allowed_paths specified
|
| 42 |
+
if __name__ == "__main__":
|
| 43 |
+
iface.launch(
|
| 44 |
+
server_name="127.0.0.1",
|
| 45 |
+
server_port=7860,
|
| 46 |
+
share=True,
|
| 47 |
+
allowed_paths=["D:/Projects/Liminal/AI_Guide/resources/uuid_images"] # Specify allowed paths during launch
|
| 48 |
+
)
|
proposed_arch.jpg
ADDED
|
Git LFS Details
|
queryhandler.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
from langchain_groq import ChatGroq
|
| 3 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 4 |
+
import os
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from collections import deque
|
| 7 |
+
|
| 8 |
+
# Load environment variables from .env file
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class QueryHandler:
|
| 13 |
+
def __init__(self, collection, llm_model_name='llama-3.3-70b-versatile',
|
| 14 |
+
embed_model_name='sentence-transformers/all-MiniLM-L6-v2', max_history=5):
|
| 15 |
+
self.collection = collection
|
| 16 |
+
self.embed_model = SentenceTransformer(embed_model_name)
|
| 17 |
+
|
| 18 |
+
# Load the Groq API key from environment variable
|
| 19 |
+
self.groq_api_key = os.getenv('GROQ_API_KEY')
|
| 20 |
+
if not self.groq_api_key:
|
| 21 |
+
raise ValueError("GROQ_API_KEY environment variable not set.")
|
| 22 |
+
|
| 23 |
+
# Initialize the LLM client with the API key
|
| 24 |
+
self.llm = ChatGroq(groq_api_key=self.groq_api_key, model_name=llm_model_name)
|
| 25 |
+
|
| 26 |
+
# Session history management
|
| 27 |
+
self.history = deque(maxlen=max_history) # Store last 'max_history' interactions
|
| 28 |
+
|
| 29 |
+
def extract_filters(self, user_prompt):
|
| 30 |
+
"""
|
| 31 |
+
Use the LLM to extract filter criteria from the user prompt.
|
| 32 |
+
"""
|
| 33 |
+
extraction_prompt = ChatPromptTemplate.from_template(
|
| 34 |
+
"""
|
| 35 |
+
Analyze the following query and extract filters for restaurant recommendations:
|
| 36 |
+
- Cuisine type (if mentioned): Look for specific cuisine preferences.
|
| 37 |
+
- Location: Identify the city or locality.
|
| 38 |
+
- Price range: Look for a maximum or minimum price (e.g., $100).
|
| 39 |
+
|
| 40 |
+
If any of these details are missing, return them as None.
|
| 41 |
+
Query: {input}
|
| 42 |
+
Provide the extracted filters in JSON format like:
|
| 43 |
+
{{
|
| 44 |
+
"cuisine_type": "string or None",
|
| 45 |
+
"location": "string or None",
|
| 46 |
+
"price": "int or None"
|
| 47 |
+
}}
|
| 48 |
+
"""
|
| 49 |
+
)
|
| 50 |
+
prompt = extraction_prompt.format(input=user_prompt)
|
| 51 |
+
response = self.llm.invoke(prompt)
|
| 52 |
+
try:
|
| 53 |
+
filters = eval(response.content) # Convert string response to dictionary
|
| 54 |
+
except Exception as e:
|
| 55 |
+
filters = {"cuisine_type": None, "location": None, "price": None}
|
| 56 |
+
return filters
|
| 57 |
+
|
| 58 |
+
def query(self, user_prompt, n_results=5):
|
| 59 |
+
"""
|
| 60 |
+
Process the user query and retrieve filtered results from ChromaDB.
|
| 61 |
+
"""
|
| 62 |
+
# Generate embedding for the user prompt
|
| 63 |
+
query_embedding = self.embed_model.encode(user_prompt).tolist()
|
| 64 |
+
|
| 65 |
+
# Extract filters using the LLM
|
| 66 |
+
filters = self.extract_filters(user_prompt)
|
| 67 |
+
|
| 68 |
+
# Create the filter dictionary for ChromaDB
|
| 69 |
+
chromadb_filter = {}
|
| 70 |
+
if filters.get("cuisine_type"):
|
| 71 |
+
chromadb_filter["cuisine_type"] = filters["cuisine_type"]
|
| 72 |
+
if filters.get("location"):
|
| 73 |
+
chromadb_filter["location"] = filters["location"]
|
| 74 |
+
if filters.get("price"):
|
| 75 |
+
chromadb_filter["price"] = {"$lte": filters["price"]}
|
| 76 |
+
|
| 77 |
+
# Perform the query in ChromaDB with filters
|
| 78 |
+
results = self.collection.query(
|
| 79 |
+
query_embeddings=[query_embedding],
|
| 80 |
+
n_results=n_results,
|
| 81 |
+
where=chromadb_filter
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
return results
|
| 85 |
+
|
| 86 |
+
def get_recent_context(self):
|
| 87 |
+
# Retrieve recent history for context, including LLM-generated responses
|
| 88 |
+
context_parts = []
|
| 89 |
+
for idx, (prompt, results, llm_response) in enumerate(self.history):
|
| 90 |
+
metadatas = results.get('metadatas', [])
|
| 91 |
+
flat_metadatas = [item for sublist in metadatas for item in sublist]
|
| 92 |
+
context = "\n".join(
|
| 93 |
+
f"{idx + 1}. Restaurant: {metadata.get('restaurant_name', 'N/A')}, "
|
| 94 |
+
f"Location: {metadata.get('location', 'N/A')}, "
|
| 95 |
+
f"Locality: {metadata.get('locality', 'N/A')}, "
|
| 96 |
+
f"City: {metadata.get('city', 'N/A')}, "
|
| 97 |
+
f"Votes: {metadata.get('votes', 'N/A')}, "
|
| 98 |
+
f"Cost: {metadata.get('cost', 'N/A')}, "
|
| 99 |
+
f"Rating: {metadata.get('rating', 'N/A')}"
|
| 100 |
+
for metadata in flat_metadatas
|
| 101 |
+
)
|
| 102 |
+
# Include the LLM-generated response for each entry
|
| 103 |
+
context_parts.append(f"Query: {prompt}\nResults:\n{context}\nResponse:\n{llm_response}")
|
| 104 |
+
|
| 105 |
+
return "\n".join(context_parts)
|
| 106 |
+
|
| 107 |
+
def generate_response(self, results, user_prompt):
|
| 108 |
+
# Gather recent context from history
|
| 109 |
+
recent_context = self.get_recent_context()
|
| 110 |
+
|
| 111 |
+
# Current query's results
|
| 112 |
+
metadatas = results.get('metadatas', [])
|
| 113 |
+
flat_metadatas = [item for sublist in metadatas for item in sublist]
|
| 114 |
+
|
| 115 |
+
# Construct recommendation text and collect image paths
|
| 116 |
+
recommendation_text = ""
|
| 117 |
+
image_paths = [] # To store image paths for rendering
|
| 118 |
+
for metadata in flat_metadatas:
|
| 119 |
+
recommendation_text += (
|
| 120 |
+
f"Restaurant: {metadata.get('restaurant_name', 'N/A')}, "
|
| 121 |
+
f"Location: {metadata.get('location', 'N/A')}, "
|
| 122 |
+
f"Locality: {metadata.get('locality', 'N/A')}, "
|
| 123 |
+
f"City: {metadata.get('city', 'N/A')}, "
|
| 124 |
+
f"Votes: {metadata.get('votes', 'N/A')}, "
|
| 125 |
+
f"Cost: {metadata.get('cost', 'N/A')}, "
|
| 126 |
+
f"Rating: {metadata.get('rating', 'N/A')}\n"
|
| 127 |
+
)
|
| 128 |
+
if metadata.get('image_path'): # Append valid image paths
|
| 129 |
+
image_paths.append(metadata['image_path'])
|
| 130 |
+
|
| 131 |
+
# LLM prompt template with session context
|
| 132 |
+
prompt_template = ChatPromptTemplate.from_template(
|
| 133 |
+
"""
|
| 134 |
+
Previous Conversations:
|
| 135 |
+
{recent_context}
|
| 136 |
+
|
| 137 |
+
Here is the data relevant to your query:
|
| 138 |
+
|
| 139 |
+
- **Name**: The restaurant's name.
|
| 140 |
+
- **Location, Locality, and City**: The address details of the restaurant.
|
| 141 |
+
- **Cuisine**: Types of cuisines offered by the restaurant.
|
| 142 |
+
- **Rating**: Average rating on a scale of 5.
|
| 143 |
+
- **Votes**: Number of people who have rated the restaurant.
|
| 144 |
+
- **Cost**: Average cost of dining.
|
| 145 |
+
|
| 146 |
+
You are a restaurant recommender with knowledge of restaurants, cuisines, ratings, and costs across various cities in India. Respond to queries based on the provided details and recommend the most relevant options based on user preferences. If information is incomplete, provide the best suggestions and encourage follow-up questions.
|
| 147 |
+
|
| 148 |
+
Use the recent context only if you require additional information from previous conversations. If the query is not related to the information available to you, make a generic response and let the user know you would be more than happy to help with restaurant recommendations.
|
| 149 |
+
|
| 150 |
+
<context>
|
| 151 |
+
{context}
|
| 152 |
+
</context>
|
| 153 |
+
Question: {input}
|
| 154 |
+
"""
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# Generate the prompt by formatting the template
|
| 158 |
+
prompt = prompt_template.format(recent_context=recent_context, context=recommendation_text, input=user_prompt)
|
| 159 |
+
|
| 160 |
+
# Use the LLM to generate the output
|
| 161 |
+
response = self.llm.invoke(prompt) # Pass the prompt string directly
|
| 162 |
+
|
| 163 |
+
# Store the prompt, results, and LLM-generated response in the history
|
| 164 |
+
self.history.append((user_prompt, results, response.content))
|
| 165 |
+
|
| 166 |
+
return response.content, image_paths # Return both recommendation text and image paths
|
requirements.txt
ADDED
|
Binary file (652 Bytes). View file
|
|
|
restaurant_uuid_images.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
s3bucket_pathadder.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
# Load the CSV file
|
| 5 |
+
file_path = "D:/Projects/Liminal/AI_Guide/resources/Dataset.csv" # Update with your input CSV file path
|
| 6 |
+
df = pd.read_csv(file_path)
|
| 7 |
+
|
| 8 |
+
# Define S3 bucket details
|
| 9 |
+
bucket_name = "restaurant-resources-liminal"
|
| 10 |
+
region = "ap-south-1"
|
| 11 |
+
s3_base_url = f"https://{bucket_name}.s3.{region}.amazonaws.com/uuid_images/"
|
| 12 |
+
|
| 13 |
+
# Update the Image_path column with S3 URLs using the Unique_ID column
|
| 14 |
+
df['S3Image_path'] = df['Unique_ID'].apply(lambda x: f"{s3_base_url}{x}.png")
|
| 15 |
+
|
| 16 |
+
# Save the updated DataFrame back to a new CSV file
|
| 17 |
+
#updated_csv_path = "updated_restaurant_data.csv"
|
| 18 |
+
#df.to_csv(updated_csv_path, index=False)
|
| 19 |
+
|
| 20 |
+
#print(f"Updated CSV file saved to: {updated_csv_path}")
|
| 21 |
+
"""
|
| 22 |
+
# Function to extract the file type and build S3 URL
|
| 23 |
+
def generate_s3_url(image_path, unique_id):
|
| 24 |
+
# Extract file extension dynamically
|
| 25 |
+
file_extension = os.path.splitext(image_path)[-1] # E.g., '.jpg', '.png'
|
| 26 |
+
# Build the full S3 URL
|
| 27 |
+
return f"{s3_base_url}{unique_id}{file_extension}"""
|
| 28 |
+
|
| 29 |
+
# Apply the function to create a new 'S3_Image_URL' column
|
| 30 |
+
#df['S3_Image_URL'] = df.apply(lambda row: generate_s3_url(row['Image_path'], row['Unique_ID']), axis=1)
|
| 31 |
+
|
| 32 |
+
# Save the updated DataFrame to a new CSV
|
| 33 |
+
output_file_path = 'D:/Projects/Liminal/AI_Guide/resources/S3Dataset.csv'
|
| 34 |
+
df.to_csv(output_file_path, index=False)
|
| 35 |
+
|
| 36 |
+
print(f"Updated CSV file saved at: {output_file_path}")
|
webbaseloader.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
# Load environment variables from .env file
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
# Your Google API Key
|
| 11 |
+
API_KEY = os.getenv('PLACES_API_KEY')
|
| 12 |
+
|
| 13 |
+
# File paths
|
| 14 |
+
input_csv = "D:/Projects/Liminal/AI_Guide/resources/restaurants_1.csv" # Input CSV file
|
| 15 |
+
output_dir = 'D:/Projects/Liminal/AI_Guide/resources/restaurant_images' # Directory to save images
|
| 16 |
+
PHOTO_BASE_URL = "https://maps.googleapis.com/maps/api/place/photo"
|
| 17 |
+
output_csv = "D:/Projects/Liminal/AI_Guide/resources/output.csv"
|
| 18 |
+
# Create output directory if it doesn't exist
|
| 19 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def get_place_id(query, api_key):
|
| 23 |
+
"""
|
| 24 |
+
Search for a place using Google Places API and return the place_id.
|
| 25 |
+
"""
|
| 26 |
+
url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
|
| 27 |
+
params = {
|
| 28 |
+
'query': query,
|
| 29 |
+
'key': api_key
|
| 30 |
+
}
|
| 31 |
+
response = requests.get(url, params=params)
|
| 32 |
+
data = response.json()
|
| 33 |
+
|
| 34 |
+
if data['status'] == 'OK' and len(data['results']) > 0:
|
| 35 |
+
return data['results'][0]['place_id']
|
| 36 |
+
else:
|
| 37 |
+
print(f"Place not found for query: {query}")
|
| 38 |
+
return None
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def get_photo_reference(place_id, api_key):
|
| 42 |
+
"""
|
| 43 |
+
Get the photo reference for a place using its place_id.
|
| 44 |
+
"""
|
| 45 |
+
url = "https://maps.googleapis.com/maps/api/place/details/json"
|
| 46 |
+
params = {
|
| 47 |
+
'place_id': place_id,
|
| 48 |
+
'key': api_key
|
| 49 |
+
}
|
| 50 |
+
response = requests.get(url, params=params)
|
| 51 |
+
data = response.json()
|
| 52 |
+
|
| 53 |
+
if data['status'] == 'OK' and 'photos' in data['result']:
|
| 54 |
+
return data['result']['photos'][0]['photo_reference']
|
| 55 |
+
else:
|
| 56 |
+
print(f"No photos found for place_id: {place_id}")
|
| 57 |
+
return None
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def download_photo(photo_reference, api_key, output_path, retries=3, delay=30):
|
| 61 |
+
"""
|
| 62 |
+
Download and save a photo using the photo reference with retry logic.
|
| 63 |
+
"""
|
| 64 |
+
url = f"https://maps.googleapis.com/maps/api/place/photo"
|
| 65 |
+
params = {
|
| 66 |
+
'photoreference': photo_reference,
|
| 67 |
+
'key': api_key,
|
| 68 |
+
'maxwidth': 400, # Specify the image resolution
|
| 69 |
+
'maxheight': 400
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
for attempt in range(1, retries + 1):
|
| 73 |
+
try:
|
| 74 |
+
response = requests.get(url, params=params, stream=True, timeout=10)
|
| 75 |
+
if response.status_code == 200:
|
| 76 |
+
with open(output_path, 'wb') as file:
|
| 77 |
+
for chunk in response.iter_content(1024):
|
| 78 |
+
file.write(chunk)
|
| 79 |
+
print(f"Image saved to {output_path}")
|
| 80 |
+
return True
|
| 81 |
+
else:
|
| 82 |
+
print(f"Failed to download image: {response.status_code} - {response.text}")
|
| 83 |
+
except requests.exceptions.RequestException as e:
|
| 84 |
+
print(f"Error downloading photo (Attempt {attempt}/{retries}): {e}")
|
| 85 |
+
|
| 86 |
+
# Wait before retrying
|
| 87 |
+
if attempt < retries:
|
| 88 |
+
print(f"Retrying in {delay} seconds...")
|
| 89 |
+
time.sleep(delay)
|
| 90 |
+
|
| 91 |
+
print(f"Failed to download image after {retries} attempts.")
|
| 92 |
+
return False
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def main():
|
| 96 |
+
"""
|
| 97 |
+
Main script to process the input CSV, fetch place IDs, photo references, and download images.
|
| 98 |
+
"""
|
| 99 |
+
# Load the CSV file
|
| 100 |
+
df = pd.read_csv(input_csv)
|
| 101 |
+
|
| 102 |
+
# Ensure the CSV contains the required columns
|
| 103 |
+
required_columns = ['Name', 'Location', 'Locality', 'City']
|
| 104 |
+
for col in required_columns:
|
| 105 |
+
if col not in df.columns:
|
| 106 |
+
print(f"CSV file must contain the following columns: {', '.join(required_columns)}")
|
| 107 |
+
return
|
| 108 |
+
|
| 109 |
+
# Add a new column for image paths
|
| 110 |
+
if 'Image_Path' not in df.columns:
|
| 111 |
+
df['Image_Path'] = None
|
| 112 |
+
|
| 113 |
+
# Get a list of existing image names in the output directory
|
| 114 |
+
existing_images = {os.path.splitext(f)[0] for f in os.listdir(output_dir) if
|
| 115 |
+
os.path.isfile(os.path.join(output_dir, f))}
|
| 116 |
+
|
| 117 |
+
# Iterate through each row in the CSV
|
| 118 |
+
for index, row in df.iterrows():
|
| 119 |
+
# Construct a sanitized image name
|
| 120 |
+
name = row['Name']
|
| 121 |
+
safe_name = "".join(c if c.isalnum() else "_" for c in name)
|
| 122 |
+
|
| 123 |
+
# Skip if the image already exists in the directory
|
| 124 |
+
if safe_name in existing_images:
|
| 125 |
+
print(f"Image already exists for {row['Name']}, skipping...")
|
| 126 |
+
continue
|
| 127 |
+
|
| 128 |
+
# Construct a detailed search query
|
| 129 |
+
location = row['Location']
|
| 130 |
+
locality = row['Locality']
|
| 131 |
+
city = row['City']
|
| 132 |
+
query = f"{name}, {location}, {locality}, {city}"
|
| 133 |
+
|
| 134 |
+
print(f"Processing: {query}")
|
| 135 |
+
|
| 136 |
+
# Step 1: Get place_id
|
| 137 |
+
place_id = get_place_id(query, API_KEY)
|
| 138 |
+
if not place_id:
|
| 139 |
+
continue
|
| 140 |
+
|
| 141 |
+
# Step 2: Get photo_reference
|
| 142 |
+
photo_reference = get_photo_reference(place_id, API_KEY)
|
| 143 |
+
if not photo_reference:
|
| 144 |
+
continue
|
| 145 |
+
|
| 146 |
+
# Step 3: Download the photo
|
| 147 |
+
output_path = os.path.join(output_dir, f"{safe_name}.jpg")
|
| 148 |
+
|
| 149 |
+
if download_photo(photo_reference, API_KEY, output_path):
|
| 150 |
+
# Update the DataFrame with the image path
|
| 151 |
+
df.at[index, 'Image_Path'] = output_path
|
| 152 |
+
|
| 153 |
+
# Save the updated DataFrame to a new CSV
|
| 154 |
+
df.to_csv(output_csv, index=False)
|
| 155 |
+
print(f"Updated CSV saved to {output_csv}")
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
if __name__ == "__main__":
|
| 159 |
+
main()
|