Aasher commited on
Commit
51f2c5d
Β·
1 Parent(s): 1971b24

chore: Create a separate app directory and move files under that. Create a config.py file for managing global configurations

Browse files
app/__init__.py ADDED
File without changes
bm25_encoder.json β†’ app/bm25_encoder.json RENAMED
File without changes
build_vectorstore.py β†’ app/build_vectorstore.py RENAMED
@@ -9,6 +9,8 @@ import uuid
9
  from dotenv import load_dotenv
10
  from tqdm import tqdm
11
 
 
 
12
  _ = load_dotenv()
13
 
14
  class PineconeHybridProductIndexer:
@@ -19,8 +21,8 @@ class PineconeHybridProductIndexer:
19
  self.index_name = index_name
20
 
21
  # Initialize embeddings model
22
- self.dense_model = OpenAIEmbeddings(model="text-embedding-3-large")
23
- self.dimensions = 3072
24
 
25
  # Initialize sparse encoder (BM25)
26
  self.sparse_encoder = BM25Encoder()
@@ -194,7 +196,7 @@ def setup_and_run():
194
 
195
  # Initialize retriever
196
  retriever = PineconeHybridProductIndexer(
197
- index_name="amazon-products-catalog",
198
  api_key=os.getenv("PINECONE_API_KEY")
199
  )
200
 
 
9
  from dotenv import load_dotenv
10
  from tqdm import tqdm
11
 
12
+ from .config import settings
13
+
14
  _ = load_dotenv()
15
 
16
  class PineconeHybridProductIndexer:
 
21
  self.index_name = index_name
22
 
23
  # Initialize embeddings model
24
+ self.dense_model = OpenAIEmbeddings(model=settings.EMBEDDING_MODEL)
25
+ self.dimensions = settings.EMBEDDING_DIMENSION
26
 
27
  # Initialize sparse encoder (BM25)
28
  self.sparse_encoder = BM25Encoder()
 
196
 
197
  # Initialize retriever
198
  retriever = PineconeHybridProductIndexer(
199
+ index_name=settings.INDEX_NAME,
200
  api_key=os.getenv("PINECONE_API_KEY")
201
  )
202
 
app/config.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+
3
+ class Settings(BaseSettings):
4
+ model_config = SettingsConfigDict(
5
+ env_file=".env", extra="ignore", env_file_encoding="utf-8"
6
+ )
7
+
8
+ # --- API Keys ---
9
+ PINECONE_API_KEY: str
10
+ NEON_KEY: str
11
+ OS_SECURITY_KEY: str
12
+ GOOGLE_API_KEY: str
13
+ CO_API_KEY: str
14
+
15
+ # --- Agent Configuration ---
16
+ AGENT_NAME: str = "Gem - Shopping Agent"
17
+ AGENT_ID: str = "shopping-agent"
18
+
19
+ AGENT_MODEL: str = "gemini-2.5-flash"
20
+ RERANKING_MODEL: str = "rerank-v3.5"
21
+ MEMORY_MANAGER_MODEL: str = "gemini-2.5-flash-lite"
22
+
23
+ EMBEDDING_MODEL: str = "text-embedding-3-large"
24
+ EMBEDDING_DIMENSION: int = 3072
25
+
26
+ NUM_HISTORY_RUNS: int = 10
27
+
28
+ # --- Vectorstore Configuration ---
29
+ INDEX_NAME: str
30
+ TOP_K: int = 5
31
+ ALPHA: float = 0.5
32
+
33
+ settings = Settings()
app/prompts.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SHOPPING_AGENT_PROMPT = """
2
+ <role>
3
+ You are **Gem**, a highly personalized and friendly shopping AI Agent.
4
+ Your purpose is to guide the user step by step in finding products that perfectly fit their needs, style, and preferences.
5
+ </role>
6
+
7
+ <behavior>
8
+ - Be warm, concise, and conversational like a trusted shopping friend.
9
+ - Be aware of the current date, time, and season to make relevant suggestions.
10
+ - Ask at most one clarifying question at a time (never bombard the user with multiple questions).
11
+ - Do NOT reveal or explain internal tools, filter-building, or any backend process to the user.
12
+ - After fetching products, do NOT repeat each product details (products appear automatically as cards in the frontend). Instead: highlight, recommend, compare, and ask which ones they like.
13
+ - Always offer a short personal recommendation plus a clear, human reason (e.g., β€œI recommend X because…”).
14
+ - If the user shares preferences or personal details, save them to memory (using `update_user_memory` tool) and confirm briefly to the user that you’ll remember.
15
+ </behavior>
16
+
17
+ <tools>
18
+ - `fetch_products`: Use this tool to fetch products from vector store. It will automatically display product cards in the frontend.
19
+ - `update_user_memory`: Use proactively whenever the user shares personal preferences or information.
20
+ </tools>
21
+
22
+ <task_flow>
23
+ 1. Understand the user's needs:
24
+ - Start by understanding the user's request. If they say "I want X" ask one targeted question to clarify (budget, size, color, use-case, must-have features).
25
+ - Keep questions short and friendly: e.g., "Nice β€” what's your budget for this?" or "Do you prefer a brand or any brand is fine?"
26
+
27
+ 2. Search for products:
28
+ - When you have enough detail, call `fetch_products` with appropriate `filters` (price range, rating, category). The filters are optional, but highly recommended to narrow down results.
29
+ - **Price**: Identify ranges like "under $50", "over $100", "between $20 and $60".
30
+ - **Rating**: Look for terms like "highly rated", "4 stars or more", "best reviewed". A generic term like "best" can imply a `min_rating` of `4.0`.
31
+ - **Rating Count**: Look for terms like "popular", "most reviewed". A generic term like "popular" can imply a `min_reviews` of `100`.
32
+ - **Categories**: Identify the categories from the user query. **MUST** be from the allowed categories listed below. Do not invent new categories. For example, if the user says "I need black shirts for men", the right category would be "men's clothing".
33
+ - If no category matches, **do not invent one**. Instead, continue without a category filter.
34
+
35
+ 3. Recommend and Personalize:
36
+ - Highlight the best options for the user.
37
+ - Give your own smart recommendation (e.g., β€œI think this one is a perfect match because…”).
38
+ - Ask them for feedback (e.g., β€œDo you like this style, or should I show something different?”).
39
+ - If results aren’t ideal, smoothly guide them by suggesting adjustments (e.g., slightly higher budget, different style).
40
+
41
+ 4. Loop back: refine results or give new options based on feedback.
42
+ </task_flow>
43
+
44
+ <allowed_categories>
45
+ You must use one of the following category values if a category filter is applicable. Remember, these categories are case-sensitive:
46
+ - appliances
47
+ - car & motorbike
48
+ - tv, audio & cameras
49
+ - sports & fitness
50
+ - grocery & gourmet foods
51
+ - home & kitchen
52
+ - pet supplies
53
+ - stores
54
+ - toys & baby products
55
+ - kids' fashion
56
+ - bags & luggage
57
+ - accessories
58
+ - women's shoes
59
+ - beauty & health
60
+ - men's shoes
61
+ - women's clothing
62
+ - industrial supplies
63
+ - men's clothing
64
+ - music
65
+ - home, kitchen, pets
66
+ </allowed_categories>
67
+ """
retriever.py β†’ app/retriever.py RENAMED
@@ -12,6 +12,8 @@ from pinecone_text.sparse import BM25Encoder
12
  from openai import OpenAI
13
  import cohere
14
 
 
 
15
  _ = load_dotenv()
16
 
17
  # Pydantic Models
@@ -50,11 +52,10 @@ class PineconeHybridRetriever:
50
  def __init__(
51
  self,
52
  index_name: str,
53
- embedding_model: str = "text-embedding-3-large",
54
- embedding_dimensions: int = 3072,
55
- rerank_model: str = "rerank-v3.5",
56
- bm25_encoder_path: str = "bm25_encoder.json",
57
- environment: str = "us-east-1"
58
  ):
59
  """Initialize Pinecone hybrid search for products"""
60
  self.index_name = index_name
@@ -63,12 +64,12 @@ class PineconeHybridRetriever:
63
  self.rerank_model = rerank_model
64
  self.bm25_encoder_path = bm25_encoder_path
65
 
66
- self._initialize_clients(environment)
67
 
68
  # Initialize encoders
69
  self._initialize_encoders()
70
 
71
- def _initialize_clients(self, environment: str) -> None:
72
  """Initialize external service clients"""
73
  try:
74
  # Initialize Pinecone
@@ -200,8 +201,8 @@ class PineconeHybridRetriever:
200
  self,
201
  query: str,
202
  filters: FilterModel = None,
203
- limit: int = 10,
204
- alpha: float = 0.5, # Balance between dense (1.0) and sparse (0.0)
205
  use_hybrid_search: bool = True,
206
  enable_reranking: bool = False,
207
  ) -> List[ProductItem]:
@@ -261,7 +262,7 @@ def example_usage():
261
 
262
  # Initialize retriever
263
  retriever = PineconeHybridRetriever(
264
- index_name="amazon-products-catalog"
265
  )
266
 
267
  # Example search
 
12
  from openai import OpenAI
13
  import cohere
14
 
15
+ from .config import settings
16
+
17
  _ = load_dotenv()
18
 
19
  # Pydantic Models
 
52
  def __init__(
53
  self,
54
  index_name: str,
55
+ embedding_model: str = settings.EMBEDDING_MODEL,
56
+ embedding_dimensions: int = settings.EMBEDDING_DIMENSION,
57
+ rerank_model: str = settings.RERANKING_MODEL,
58
+ bm25_encoder_path: str = "app/bm25_encoder.json"
 
59
  ):
60
  """Initialize Pinecone hybrid search for products"""
61
  self.index_name = index_name
 
64
  self.rerank_model = rerank_model
65
  self.bm25_encoder_path = bm25_encoder_path
66
 
67
+ self._initialize_clients()
68
 
69
  # Initialize encoders
70
  self._initialize_encoders()
71
 
72
+ def _initialize_clients(self) -> None:
73
  """Initialize external service clients"""
74
  try:
75
  # Initialize Pinecone
 
201
  self,
202
  query: str,
203
  filters: FilterModel = None,
204
+ limit: int = settings.TOP_K,
205
+ alpha: float = settings.ALPHA, # Balance between dense (1.0) and sparse (0.0)
206
  use_hybrid_search: bool = True,
207
  enable_reranking: bool = False,
208
  ) -> List[ProductItem]:
 
262
 
263
  # Initialize retriever
264
  retriever = PineconeHybridRetriever(
265
+ index_name=settings.INDEX_NAME
266
  )
267
 
268
  # Example search
tools.py β†’ app/tools.py RENAMED
@@ -1,5 +1,6 @@
1
  from agno.tools import tool
2
- from retriever import PineconeHybridRetriever, FilterModel, ProductItem
 
3
 
4
  from typing import Optional
5
  import json
@@ -9,7 +10,7 @@ load_dotenv()
9
 
10
  @tool
11
  def fetch_products(
12
- query: str, limit: int = 5, filters: Optional[FilterModel] = None
13
  ) -> list[dict]:
14
  """
15
  Fetch products from database based on query and filters
@@ -22,7 +23,7 @@ def fetch_products(
22
  Returns:
23
  list[dict]: List of products dictionary items matching the query and filters
24
  """
25
- retriever = PineconeHybridRetriever("amazon-products-catalog")
26
  products = retriever.search_products(
27
  query=query,
28
  filters=filters,
 
1
  from agno.tools import tool
2
+ from .retriever import PineconeHybridRetriever, FilterModel
3
+ from .config import settings
4
 
5
  from typing import Optional
6
  import json
 
10
 
11
  @tool
12
  def fetch_products(
13
+ query: str, limit: int = settings.TOP_K, filters: Optional[FilterModel] = None
14
  ) -> list[dict]:
15
  """
16
  Fetch products from database based on query and filters
 
23
  Returns:
24
  list[dict]: List of products dictionary items matching the query and filters
25
  """
26
+ retriever = PineconeHybridRetriever(settings.INDEX_NAME)
27
  products = retriever.search_products(
28
  query=query,
29
  filters=filters,
main.py CHANGED
@@ -4,35 +4,37 @@ from agno.db.postgres import PostgresDb
4
  from agno.os import AgentOS
5
  from agno.memory import MemoryManager
6
 
7
- from prompts import SHOPPING_AGENT_PROMPT
8
- from tools import fetch_products
 
9
 
10
- from dotenv import load_dotenv
11
  import os
 
 
12
  load_dotenv()
13
 
14
  postgres_db = PostgresDb(
15
- db_url=os.getenv("NEON_KEY"),
16
  memory_table="user_memories",
17
  )
18
 
19
  tools = [fetch_products]
20
 
21
  shopping_agent = Agent(
22
- model=Gemini(id="gemini-2.5-flash"),
23
- name="Gem - Shopping Agent",
24
- id="shopping-agent",
25
  add_datetime_to_context=True,
26
  db=postgres_db,
27
  tools=tools,
28
  instructions=SHOPPING_AGENT_PROMPT,
29
  enable_agentic_memory=True,
30
  memory_manager=MemoryManager(
31
- Gemini(id="gemini-2.5-flash-lite"),
32
- db=db,
33
  ),
34
  add_history_to_context=True,
35
- num_history_runs=10,
36
  )
37
 
38
  agent_os = AgentOS(
 
4
  from agno.os import AgentOS
5
  from agno.memory import MemoryManager
6
 
7
+ from app.prompts import SHOPPING_AGENT_PROMPT
8
+ from app.tools import fetch_products
9
+ from app.config import settings
10
 
 
11
  import os
12
+ from dotenv import load_dotenv
13
+
14
  load_dotenv()
15
 
16
  postgres_db = PostgresDb(
17
+ db_url=settings.NEON_KEY,
18
  memory_table="user_memories",
19
  )
20
 
21
  tools = [fetch_products]
22
 
23
  shopping_agent = Agent(
24
+ model=Gemini(id=settings.AGENT_MODEL),
25
+ name=settings.AGENT_NAME,
26
+ id=settings.AGENT_ID,
27
  add_datetime_to_context=True,
28
  db=postgres_db,
29
  tools=tools,
30
  instructions=SHOPPING_AGENT_PROMPT,
31
  enable_agentic_memory=True,
32
  memory_manager=MemoryManager(
33
+ Gemini(id=settings.MEMORY_MANAGER_MODEL),
34
+ db=postgres_db,
35
  ),
36
  add_history_to_context=True,
37
+ num_history_runs=settings.NUM_HISTORY_RUNS,
38
  )
39
 
40
  agent_os = AgentOS(
prompts.py DELETED
@@ -1,45 +0,0 @@
1
- SHOPPING_AGENT_PROMPT = """
2
- <role>
3
- You are **Gem**, a personalized shopping AI Agent. Your goal is to help user find the best products based on their needs.
4
- </role>
5
-
6
- <task>
7
-
8
- 1. **Analyze the User Need**: Chat with the user to help them identify the product, along with any specific constraints like price, rating, or category.
9
- 2. **Fetch Products**: Once they have given you the details about the product they want, use the `fetch_products` tool to get the products. This tool will automatically display the products to the user in frontend. Follow these instructions when using the tool:
10
- * **Extract `filters`**: Identify any explicit constraints and populate the `filters` object.
11
- - **Price**: Look for terms like "under $50", "over 100 dollars", "between $20 and $60".
12
- - **Rating**: Look for terms like "highly rated", "4 stars or more", "best reviewed". A generic term like "best" can imply a `min_rating` of `4.0`.
13
- - **Rating Count**: Look for terms like "popular", "most reviewed". A generic term like "popular" can imply a `min_reviews` of `100`.
14
- - **Categories**: Identify the categories from the user query. **MUST** be from the allowed categories listed below. Do not invent new categories. For example, if the user says "I need black shirts for men", then the right category would be "men's clothing".
15
- 3. **Suggest Products**: Analyze the retrieved products. Provide recommendations to the user. DO NOT write each product details again, because the products are already displayed in frontend. Instead, you should highlight the best ones based on the user's needs and give them personalized suggestions.
16
-
17
- </task>
18
-
19
- <allowed_categories>
20
- You must use one of the following category values if a category filter is applicable:
21
- - appliances
22
- - car & motorbike
23
- - tv, audio & cameras
24
- - sports & fitness
25
- - grocery & gourmet foods
26
- - home & kitchen
27
- - pet supplies
28
- - stores
29
- - toys & baby products
30
- - kids' fashion
31
- - bags & luggage
32
- - accessories
33
- - women's shoes
34
- - beauty & health
35
- - men's shoes
36
- - women's clothing
37
- - industrial supplies
38
- - men's clothing
39
- - music
40
- - home, kitchen, pets
41
-
42
- NOTE: These categories are case-sensitive. So you must use the exact category values provided.
43
- </allowed_categories>
44
-
45
- """