Spaces:
Sleeping
Sleeping
chore: Create a separate app directory and move files under that. Create a config.py file for managing global configurations
Browse files- app/__init__.py +0 -0
- bm25_encoder.json β app/bm25_encoder.json +0 -0
- build_vectorstore.py β app/build_vectorstore.py +5 -3
- app/config.py +33 -0
- app/prompts.py +67 -0
- retriever.py β app/retriever.py +11 -10
- tools.py β app/tools.py +4 -3
- main.py +12 -10
- prompts.py +0 -45
app/__init__.py
ADDED
|
File without changes
|
bm25_encoder.json β app/bm25_encoder.json
RENAMED
|
File without changes
|
build_vectorstore.py β app/build_vectorstore.py
RENAMED
|
@@ -9,6 +9,8 @@ import uuid
|
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
from tqdm import tqdm
|
| 11 |
|
|
|
|
|
|
|
| 12 |
_ = load_dotenv()
|
| 13 |
|
| 14 |
class PineconeHybridProductIndexer:
|
|
@@ -19,8 +21,8 @@ class PineconeHybridProductIndexer:
|
|
| 19 |
self.index_name = index_name
|
| 20 |
|
| 21 |
# Initialize embeddings model
|
| 22 |
-
self.dense_model = OpenAIEmbeddings(model=
|
| 23 |
-
self.dimensions =
|
| 24 |
|
| 25 |
# Initialize sparse encoder (BM25)
|
| 26 |
self.sparse_encoder = BM25Encoder()
|
|
@@ -194,7 +196,7 @@ def setup_and_run():
|
|
| 194 |
|
| 195 |
# Initialize retriever
|
| 196 |
retriever = PineconeHybridProductIndexer(
|
| 197 |
-
index_name=
|
| 198 |
api_key=os.getenv("PINECONE_API_KEY")
|
| 199 |
)
|
| 200 |
|
|
|
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
from tqdm import tqdm
|
| 11 |
|
| 12 |
+
from .config import settings
|
| 13 |
+
|
| 14 |
_ = load_dotenv()
|
| 15 |
|
| 16 |
class PineconeHybridProductIndexer:
|
|
|
|
| 21 |
self.index_name = index_name
|
| 22 |
|
| 23 |
# Initialize embeddings model
|
| 24 |
+
self.dense_model = OpenAIEmbeddings(model=settings.EMBEDDING_MODEL)
|
| 25 |
+
self.dimensions = settings.EMBEDDING_DIMENSION
|
| 26 |
|
| 27 |
# Initialize sparse encoder (BM25)
|
| 28 |
self.sparse_encoder = BM25Encoder()
|
|
|
|
| 196 |
|
| 197 |
# Initialize retriever
|
| 198 |
retriever = PineconeHybridProductIndexer(
|
| 199 |
+
index_name=settings.INDEX_NAME,
|
| 200 |
api_key=os.getenv("PINECONE_API_KEY")
|
| 201 |
)
|
| 202 |
|
app/config.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 2 |
+
|
| 3 |
+
class Settings(BaseSettings):
|
| 4 |
+
model_config = SettingsConfigDict(
|
| 5 |
+
env_file=".env", extra="ignore", env_file_encoding="utf-8"
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
# --- API Keys ---
|
| 9 |
+
PINECONE_API_KEY: str
|
| 10 |
+
NEON_KEY: str
|
| 11 |
+
OS_SECURITY_KEY: str
|
| 12 |
+
GOOGLE_API_KEY: str
|
| 13 |
+
CO_API_KEY: str
|
| 14 |
+
|
| 15 |
+
# --- Agent Configuration ---
|
| 16 |
+
AGENT_NAME: str = "Gem - Shopping Agent"
|
| 17 |
+
AGENT_ID: str = "shopping-agent"
|
| 18 |
+
|
| 19 |
+
AGENT_MODEL: str = "gemini-2.5-flash"
|
| 20 |
+
RERANKING_MODEL: str = "rerank-v3.5"
|
| 21 |
+
MEMORY_MANAGER_MODEL: str = "gemini-2.5-flash-lite"
|
| 22 |
+
|
| 23 |
+
EMBEDDING_MODEL: str = "text-embedding-3-large"
|
| 24 |
+
EMBEDDING_DIMENSION: int = 3072
|
| 25 |
+
|
| 26 |
+
NUM_HISTORY_RUNS: int = 10
|
| 27 |
+
|
| 28 |
+
# --- Vectorstore Configuration ---
|
| 29 |
+
INDEX_NAME: str
|
| 30 |
+
TOP_K: int = 5
|
| 31 |
+
ALPHA: float = 0.5
|
| 32 |
+
|
| 33 |
+
settings = Settings()
|
app/prompts.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SHOPPING_AGENT_PROMPT = """
|
| 2 |
+
<role>
|
| 3 |
+
You are **Gem**, a highly personalized and friendly shopping AI Agent.
|
| 4 |
+
Your purpose is to guide the user step by step in finding products that perfectly fit their needs, style, and preferences.
|
| 5 |
+
</role>
|
| 6 |
+
|
| 7 |
+
<behavior>
|
| 8 |
+
- Be warm, concise, and conversational like a trusted shopping friend.
|
| 9 |
+
- Be aware of the current date, time, and season to make relevant suggestions.
|
| 10 |
+
- Ask at most one clarifying question at a time (never bombard the user with multiple questions).
|
| 11 |
+
- Do NOT reveal or explain internal tools, filter-building, or any backend process to the user.
|
| 12 |
+
- After fetching products, do NOT repeat each product details (products appear automatically as cards in the frontend). Instead: highlight, recommend, compare, and ask which ones they like.
|
| 13 |
+
- Always offer a short personal recommendation plus a clear, human reason (e.g., βI recommend X becauseβ¦β).
|
| 14 |
+
- If the user shares preferences or personal details, save them to memory (using `update_user_memory` tool) and confirm briefly to the user that youβll remember.
|
| 15 |
+
</behavior>
|
| 16 |
+
|
| 17 |
+
<tools>
|
| 18 |
+
- `fetch_products`: Use this tool to fetch products from vector store. It will automatically display product cards in the frontend.
|
| 19 |
+
- `update_user_memory`: Use proactively whenever the user shares personal preferences or information.
|
| 20 |
+
</tools>
|
| 21 |
+
|
| 22 |
+
<task_flow>
|
| 23 |
+
1. Understand the user's needs:
|
| 24 |
+
- Start by understanding the user's request. If they say "I want X" ask one targeted question to clarify (budget, size, color, use-case, must-have features).
|
| 25 |
+
- Keep questions short and friendly: e.g., "Nice β what's your budget for this?" or "Do you prefer a brand or any brand is fine?"
|
| 26 |
+
|
| 27 |
+
2. Search for products:
|
| 28 |
+
- When you have enough detail, call `fetch_products` with appropriate `filters` (price range, rating, category). The filters are optional, but highly recommended to narrow down results.
|
| 29 |
+
- **Price**: Identify ranges like "under $50", "over $100", "between $20 and $60".
|
| 30 |
+
- **Rating**: Look for terms like "highly rated", "4 stars or more", "best reviewed". A generic term like "best" can imply a `min_rating` of `4.0`.
|
| 31 |
+
- **Rating Count**: Look for terms like "popular", "most reviewed". A generic term like "popular" can imply a `min_reviews` of `100`.
|
| 32 |
+
- **Categories**: Identify the categories from the user query. **MUST** be from the allowed categories listed below. Do not invent new categories. For example, if the user says "I need black shirts for men", the right category would be "men's clothing".
|
| 33 |
+
- If no category matches, **do not invent one**. Instead, continue without a category filter.
|
| 34 |
+
|
| 35 |
+
3. Recommend and Personalize:
|
| 36 |
+
- Highlight the best options for the user.
|
| 37 |
+
- Give your own smart recommendation (e.g., βI think this one is a perfect match becauseβ¦β).
|
| 38 |
+
- Ask them for feedback (e.g., βDo you like this style, or should I show something different?β).
|
| 39 |
+
- If results arenβt ideal, smoothly guide them by suggesting adjustments (e.g., slightly higher budget, different style).
|
| 40 |
+
|
| 41 |
+
4. Loop back: refine results or give new options based on feedback.
|
| 42 |
+
</task_flow>
|
| 43 |
+
|
| 44 |
+
<allowed_categories>
|
| 45 |
+
You must use one of the following category values if a category filter is applicable. Remember, these categories are case-sensitive:
|
| 46 |
+
- appliances
|
| 47 |
+
- car & motorbike
|
| 48 |
+
- tv, audio & cameras
|
| 49 |
+
- sports & fitness
|
| 50 |
+
- grocery & gourmet foods
|
| 51 |
+
- home & kitchen
|
| 52 |
+
- pet supplies
|
| 53 |
+
- stores
|
| 54 |
+
- toys & baby products
|
| 55 |
+
- kids' fashion
|
| 56 |
+
- bags & luggage
|
| 57 |
+
- accessories
|
| 58 |
+
- women's shoes
|
| 59 |
+
- beauty & health
|
| 60 |
+
- men's shoes
|
| 61 |
+
- women's clothing
|
| 62 |
+
- industrial supplies
|
| 63 |
+
- men's clothing
|
| 64 |
+
- music
|
| 65 |
+
- home, kitchen, pets
|
| 66 |
+
</allowed_categories>
|
| 67 |
+
"""
|
retriever.py β app/retriever.py
RENAMED
|
@@ -12,6 +12,8 @@ from pinecone_text.sparse import BM25Encoder
|
|
| 12 |
from openai import OpenAI
|
| 13 |
import cohere
|
| 14 |
|
|
|
|
|
|
|
| 15 |
_ = load_dotenv()
|
| 16 |
|
| 17 |
# Pydantic Models
|
|
@@ -50,11 +52,10 @@ class PineconeHybridRetriever:
|
|
| 50 |
def __init__(
|
| 51 |
self,
|
| 52 |
index_name: str,
|
| 53 |
-
embedding_model: str =
|
| 54 |
-
embedding_dimensions: int =
|
| 55 |
-
rerank_model: str =
|
| 56 |
-
bm25_encoder_path: str = "bm25_encoder.json"
|
| 57 |
-
environment: str = "us-east-1"
|
| 58 |
):
|
| 59 |
"""Initialize Pinecone hybrid search for products"""
|
| 60 |
self.index_name = index_name
|
|
@@ -63,12 +64,12 @@ class PineconeHybridRetriever:
|
|
| 63 |
self.rerank_model = rerank_model
|
| 64 |
self.bm25_encoder_path = bm25_encoder_path
|
| 65 |
|
| 66 |
-
self._initialize_clients(
|
| 67 |
|
| 68 |
# Initialize encoders
|
| 69 |
self._initialize_encoders()
|
| 70 |
|
| 71 |
-
def _initialize_clients(self
|
| 72 |
"""Initialize external service clients"""
|
| 73 |
try:
|
| 74 |
# Initialize Pinecone
|
|
@@ -200,8 +201,8 @@ class PineconeHybridRetriever:
|
|
| 200 |
self,
|
| 201 |
query: str,
|
| 202 |
filters: FilterModel = None,
|
| 203 |
-
limit: int =
|
| 204 |
-
alpha: float =
|
| 205 |
use_hybrid_search: bool = True,
|
| 206 |
enable_reranking: bool = False,
|
| 207 |
) -> List[ProductItem]:
|
|
@@ -261,7 +262,7 @@ def example_usage():
|
|
| 261 |
|
| 262 |
# Initialize retriever
|
| 263 |
retriever = PineconeHybridRetriever(
|
| 264 |
-
index_name=
|
| 265 |
)
|
| 266 |
|
| 267 |
# Example search
|
|
|
|
| 12 |
from openai import OpenAI
|
| 13 |
import cohere
|
| 14 |
|
| 15 |
+
from .config import settings
|
| 16 |
+
|
| 17 |
_ = load_dotenv()
|
| 18 |
|
| 19 |
# Pydantic Models
|
|
|
|
| 52 |
def __init__(
|
| 53 |
self,
|
| 54 |
index_name: str,
|
| 55 |
+
embedding_model: str = settings.EMBEDDING_MODEL,
|
| 56 |
+
embedding_dimensions: int = settings.EMBEDDING_DIMENSION,
|
| 57 |
+
rerank_model: str = settings.RERANKING_MODEL,
|
| 58 |
+
bm25_encoder_path: str = "app/bm25_encoder.json"
|
|
|
|
| 59 |
):
|
| 60 |
"""Initialize Pinecone hybrid search for products"""
|
| 61 |
self.index_name = index_name
|
|
|
|
| 64 |
self.rerank_model = rerank_model
|
| 65 |
self.bm25_encoder_path = bm25_encoder_path
|
| 66 |
|
| 67 |
+
self._initialize_clients()
|
| 68 |
|
| 69 |
# Initialize encoders
|
| 70 |
self._initialize_encoders()
|
| 71 |
|
| 72 |
+
def _initialize_clients(self) -> None:
|
| 73 |
"""Initialize external service clients"""
|
| 74 |
try:
|
| 75 |
# Initialize Pinecone
|
|
|
|
| 201 |
self,
|
| 202 |
query: str,
|
| 203 |
filters: FilterModel = None,
|
| 204 |
+
limit: int = settings.TOP_K,
|
| 205 |
+
alpha: float = settings.ALPHA, # Balance between dense (1.0) and sparse (0.0)
|
| 206 |
use_hybrid_search: bool = True,
|
| 207 |
enable_reranking: bool = False,
|
| 208 |
) -> List[ProductItem]:
|
|
|
|
| 262 |
|
| 263 |
# Initialize retriever
|
| 264 |
retriever = PineconeHybridRetriever(
|
| 265 |
+
index_name=settings.INDEX_NAME
|
| 266 |
)
|
| 267 |
|
| 268 |
# Example search
|
tools.py β app/tools.py
RENAMED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from agno.tools import tool
|
| 2 |
-
from retriever import PineconeHybridRetriever, FilterModel
|
|
|
|
| 3 |
|
| 4 |
from typing import Optional
|
| 5 |
import json
|
|
@@ -9,7 +10,7 @@ load_dotenv()
|
|
| 9 |
|
| 10 |
@tool
|
| 11 |
def fetch_products(
|
| 12 |
-
query: str, limit: int =
|
| 13 |
) -> list[dict]:
|
| 14 |
"""
|
| 15 |
Fetch products from database based on query and filters
|
|
@@ -22,7 +23,7 @@ def fetch_products(
|
|
| 22 |
Returns:
|
| 23 |
list[dict]: List of products dictionary items matching the query and filters
|
| 24 |
"""
|
| 25 |
-
retriever = PineconeHybridRetriever(
|
| 26 |
products = retriever.search_products(
|
| 27 |
query=query,
|
| 28 |
filters=filters,
|
|
|
|
| 1 |
from agno.tools import tool
|
| 2 |
+
from .retriever import PineconeHybridRetriever, FilterModel
|
| 3 |
+
from .config import settings
|
| 4 |
|
| 5 |
from typing import Optional
|
| 6 |
import json
|
|
|
|
| 10 |
|
| 11 |
@tool
|
| 12 |
def fetch_products(
|
| 13 |
+
query: str, limit: int = settings.TOP_K, filters: Optional[FilterModel] = None
|
| 14 |
) -> list[dict]:
|
| 15 |
"""
|
| 16 |
Fetch products from database based on query and filters
|
|
|
|
| 23 |
Returns:
|
| 24 |
list[dict]: List of products dictionary items matching the query and filters
|
| 25 |
"""
|
| 26 |
+
retriever = PineconeHybridRetriever(settings.INDEX_NAME)
|
| 27 |
products = retriever.search_products(
|
| 28 |
query=query,
|
| 29 |
filters=filters,
|
main.py
CHANGED
|
@@ -4,35 +4,37 @@ from agno.db.postgres import PostgresDb
|
|
| 4 |
from agno.os import AgentOS
|
| 5 |
from agno.memory import MemoryManager
|
| 6 |
|
| 7 |
-
from prompts import SHOPPING_AGENT_PROMPT
|
| 8 |
-
from tools import fetch_products
|
|
|
|
| 9 |
|
| 10 |
-
from dotenv import load_dotenv
|
| 11 |
import os
|
|
|
|
|
|
|
| 12 |
load_dotenv()
|
| 13 |
|
| 14 |
postgres_db = PostgresDb(
|
| 15 |
-
db_url=
|
| 16 |
memory_table="user_memories",
|
| 17 |
)
|
| 18 |
|
| 19 |
tools = [fetch_products]
|
| 20 |
|
| 21 |
shopping_agent = Agent(
|
| 22 |
-
model=Gemini(id=
|
| 23 |
-
name=
|
| 24 |
-
id=
|
| 25 |
add_datetime_to_context=True,
|
| 26 |
db=postgres_db,
|
| 27 |
tools=tools,
|
| 28 |
instructions=SHOPPING_AGENT_PROMPT,
|
| 29 |
enable_agentic_memory=True,
|
| 30 |
memory_manager=MemoryManager(
|
| 31 |
-
Gemini(id=
|
| 32 |
-
db=
|
| 33 |
),
|
| 34 |
add_history_to_context=True,
|
| 35 |
-
num_history_runs=
|
| 36 |
)
|
| 37 |
|
| 38 |
agent_os = AgentOS(
|
|
|
|
| 4 |
from agno.os import AgentOS
|
| 5 |
from agno.memory import MemoryManager
|
| 6 |
|
| 7 |
+
from app.prompts import SHOPPING_AGENT_PROMPT
|
| 8 |
+
from app.tools import fetch_products
|
| 9 |
+
from app.config import settings
|
| 10 |
|
|
|
|
| 11 |
import os
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
|
| 14 |
load_dotenv()
|
| 15 |
|
| 16 |
postgres_db = PostgresDb(
|
| 17 |
+
db_url=settings.NEON_KEY,
|
| 18 |
memory_table="user_memories",
|
| 19 |
)
|
| 20 |
|
| 21 |
tools = [fetch_products]
|
| 22 |
|
| 23 |
shopping_agent = Agent(
|
| 24 |
+
model=Gemini(id=settings.AGENT_MODEL),
|
| 25 |
+
name=settings.AGENT_NAME,
|
| 26 |
+
id=settings.AGENT_ID,
|
| 27 |
add_datetime_to_context=True,
|
| 28 |
db=postgres_db,
|
| 29 |
tools=tools,
|
| 30 |
instructions=SHOPPING_AGENT_PROMPT,
|
| 31 |
enable_agentic_memory=True,
|
| 32 |
memory_manager=MemoryManager(
|
| 33 |
+
Gemini(id=settings.MEMORY_MANAGER_MODEL),
|
| 34 |
+
db=postgres_db,
|
| 35 |
),
|
| 36 |
add_history_to_context=True,
|
| 37 |
+
num_history_runs=settings.NUM_HISTORY_RUNS,
|
| 38 |
)
|
| 39 |
|
| 40 |
agent_os = AgentOS(
|
prompts.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
SHOPPING_AGENT_PROMPT = """
|
| 2 |
-
<role>
|
| 3 |
-
You are **Gem**, a personalized shopping AI Agent. Your goal is to help user find the best products based on their needs.
|
| 4 |
-
</role>
|
| 5 |
-
|
| 6 |
-
<task>
|
| 7 |
-
|
| 8 |
-
1. **Analyze the User Need**: Chat with the user to help them identify the product, along with any specific constraints like price, rating, or category.
|
| 9 |
-
2. **Fetch Products**: Once they have given you the details about the product they want, use the `fetch_products` tool to get the products. This tool will automatically display the products to the user in frontend. Follow these instructions when using the tool:
|
| 10 |
-
* **Extract `filters`**: Identify any explicit constraints and populate the `filters` object.
|
| 11 |
-
- **Price**: Look for terms like "under $50", "over 100 dollars", "between $20 and $60".
|
| 12 |
-
- **Rating**: Look for terms like "highly rated", "4 stars or more", "best reviewed". A generic term like "best" can imply a `min_rating` of `4.0`.
|
| 13 |
-
- **Rating Count**: Look for terms like "popular", "most reviewed". A generic term like "popular" can imply a `min_reviews` of `100`.
|
| 14 |
-
- **Categories**: Identify the categories from the user query. **MUST** be from the allowed categories listed below. Do not invent new categories. For example, if the user says "I need black shirts for men", then the right category would be "men's clothing".
|
| 15 |
-
3. **Suggest Products**: Analyze the retrieved products. Provide recommendations to the user. DO NOT write each product details again, because the products are already displayed in frontend. Instead, you should highlight the best ones based on the user's needs and give them personalized suggestions.
|
| 16 |
-
|
| 17 |
-
</task>
|
| 18 |
-
|
| 19 |
-
<allowed_categories>
|
| 20 |
-
You must use one of the following category values if a category filter is applicable:
|
| 21 |
-
- appliances
|
| 22 |
-
- car & motorbike
|
| 23 |
-
- tv, audio & cameras
|
| 24 |
-
- sports & fitness
|
| 25 |
-
- grocery & gourmet foods
|
| 26 |
-
- home & kitchen
|
| 27 |
-
- pet supplies
|
| 28 |
-
- stores
|
| 29 |
-
- toys & baby products
|
| 30 |
-
- kids' fashion
|
| 31 |
-
- bags & luggage
|
| 32 |
-
- accessories
|
| 33 |
-
- women's shoes
|
| 34 |
-
- beauty & health
|
| 35 |
-
- men's shoes
|
| 36 |
-
- women's clothing
|
| 37 |
-
- industrial supplies
|
| 38 |
-
- men's clothing
|
| 39 |
-
- music
|
| 40 |
-
- home, kitchen, pets
|
| 41 |
-
|
| 42 |
-
NOTE: These categories are case-sensitive. So you must use the exact category values provided.
|
| 43 |
-
</allowed_categories>
|
| 44 |
-
|
| 45 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|