|
|
from data import call_genai_embedding_api,debug_print,all_card_names,features,card_descriptions,llm1
|
|
|
from nodes.intent import get_pretty_state_string,CreditCardState
|
|
|
import faiss
|
|
|
import numpy as np
|
|
|
import json
|
|
|
import re
|
|
|
from langchain_core.messages import SystemMessage, HumanMessage
|
|
|
|
|
|
|
|
|
model_name = "models/text-embedding-004"
|
|
|
|
|
|
def chunk_text(text, chunk_size=1):
|
|
|
sentences = text.split("; ")
|
|
|
return ["; ".join(sentences[i:i+chunk_size]) for i in range(0, len(sentences), chunk_size)]
|
|
|
|
|
|
def get_gemini_embeddings(texts):
|
|
|
embs = []
|
|
|
for txt in texts:
|
|
|
resp = call_genai_embedding_api(model_name, model=model_name, content=txt, task_type="RETRIEVAL_DOCUMENT")
|
|
|
embs.append(np.array(resp["embedding"], dtype=np.float32))
|
|
|
return np.vstack(embs)
|
|
|
|
|
|
chunk_texts, chunk_name_mapping = [], {}
|
|
|
chunk_texts, chunk_name_mapping = [], {}
|
|
|
for card, desc in card_descriptions.items():
|
|
|
for chunk in chunk_text(desc):
|
|
|
idx = len(chunk_texts)
|
|
|
chunk_texts.append(chunk)
|
|
|
chunk_name_mapping[idx] = card
|
|
|
|
|
|
chunk_embeddings = get_gemini_embeddings(chunk_texts)
|
|
|
faiss.normalize_L2(chunk_embeddings)
|
|
|
|
|
|
print(f" Prepared {len(chunk_texts)} chunks and embeddings.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def query_refiner_node(state: CreditCardState) -> CreditCardState:
|
|
|
debug_print("NODE", f"Entered query_refiner_node with state:\n {get_pretty_state_string(state)}\n")
|
|
|
|
|
|
user_query = state.get("query", "")
|
|
|
state["raw_query"] = user_query
|
|
|
preferences = state.get("preferences", [])
|
|
|
|
|
|
card_list_str = ", ".join(all_card_names)
|
|
|
feature_list_str = ", ".join(features)
|
|
|
|
|
|
preferences_text = ""
|
|
|
if preferences:
|
|
|
preferences_text = "User selected preferences: " + ", ".join(preferences) + "."
|
|
|
|
|
|
prompt = f"""
|
|
|
You are an AI assistant that refines user queries to make them optimized for credit card information retrieval while strictly preserving the user's original intent.
|
|
|
Your task is to restructure queries in a way that enables accurate and relevant credit card recommendations.
|
|
|
|
|
|
Instructions:
|
|
|
- Identify the main intent or feature of the query (e.g., cashback, lounge access, air miles).
|
|
|
- Format the query clearly and concisely, with the primary benefit mentioned first.
|
|
|
- Use direct and specific keywords that match real credit card benefits.
|
|
|
- Respect and incorporate any stated user preferences.
|
|
|
- Do NOT introduce any benefits not explicitly requested by the user.
|
|
|
- Retain and highlight any mention of a specific bank or card brand if provided.
|
|
|
- If the user mentions "beginner", "entry-level", or "low credit score", focus on basic features like cashback, reward points, and easy approvals.
|
|
|
|
|
|
**Card Exclusions:**
|
|
|
- Only identify cards for exclusion if the user clearly states they already have them.
|
|
|
- Do not guess or assume exclusions based on features.
|
|
|
|
|
|
**Examples:**
|
|
|
|
|
|
Example 1
|
|
|
User Query: "I drive a lot for work and want a credit card with good fuel rewards and travel perks."
|
|
|
Output:
|
|
|
{{
|
|
|
"optimized_query": "Category: Fuel Rewards | Best credit cards for high fuel spending with maximum rewards & fuel surcharge waiver. Travel perks preferred but secondary.",
|
|
|
"excluded_cards": []
|
|
|
}}
|
|
|
|
|
|
Example 2
|
|
|
User Query: "I already have a card with airline miles. Are there better options for frequent flyers?"
|
|
|
Output:
|
|
|
{{
|
|
|
"optimized_query": "Category: Travel Benefits | Credit cards with premium travel perks like airport lounge access, hotel discounts, and concierge services. Avoid repeating airline mile features already covered.",
|
|
|
"excluded_cards": []
|
|
|
}}
|
|
|
|
|
|
The following is the user's query. Please provide a JSON response.
|
|
|
|
|
|
Preferences: "{preferences_text}"
|
|
|
User Query: "{user_query}"
|
|
|
|
|
|
Output:
|
|
|
"""
|
|
|
|
|
|
print("Rewriting user query for optimization...")
|
|
|
|
|
|
try:
|
|
|
|
|
|
query_refine_messages = [
|
|
|
SystemMessage(content="You are an AI assistant that refines user queries to make them optimized for credit card information retrieval while strictly preserving the user's original intent."),
|
|
|
HumanMessage(content=prompt)
|
|
|
]
|
|
|
|
|
|
response_obj = await llm1.ainvoke(
|
|
|
query_refine_messages,
|
|
|
config={"max_tokens": 512, "temperature": 0.0}
|
|
|
)
|
|
|
clean_text = re.sub(r"^```json|```$", "", response_obj.content.strip(), flags=re.MULTILINE).strip()
|
|
|
result = json.loads(clean_text)
|
|
|
|
|
|
optimized_query = result["optimized_query"]
|
|
|
excluded_cards = result.get("excluded_cards", [])
|
|
|
|
|
|
print("Optimized:", optimized_query)
|
|
|
print("Excluded Cards:", excluded_cards)
|
|
|
|
|
|
state["query"] = optimized_query
|
|
|
state["excluded_cards"] = excluded_cards
|
|
|
|
|
|
|
|
|
|
|
|
multi_query_prompt = f"""
|
|
|
The following is a detailed credit card search query:
|
|
|
"{optimized_query}"
|
|
|
Generate 3 distinct subqueries that **collectively cover all the important features** from the original query.
|
|
|
Each subquery should emphasize a **different combination** of the features (e.g., lounge access, travel insurance, low foreign transaction fees, hotel discounts, etc.).
|
|
|
Keep the same format: "Category: ... | ...". Make sure all features from the original query are represented across the 3 queries.
|
|
|
Output only the subqueries, one per line. Do not include any explanations, numbering, or formatting — just plain queries separated by newline characters.
|
|
|
"""
|
|
|
|
|
|
multi_query_messages = [
|
|
|
SystemMessage(content="You are an AI assistant that generates multiple search queries based on a primary query."),
|
|
|
HumanMessage(content=multi_query_prompt)
|
|
|
]
|
|
|
|
|
|
response2_obj = await llm1.ainvoke(
|
|
|
multi_query_messages,
|
|
|
config={"max_tokens": 256, "temperature": 0.7}
|
|
|
)
|
|
|
queries = [q.strip() for q in response2_obj.content.strip().split('\n') if q.strip()]
|
|
|
|
|
|
state["multi_queries"] = queries
|
|
|
print("Generated Multi-Queries:", queries)
|
|
|
except Exception as e:
|
|
|
print("LLM query refinement failed:", e)
|
|
|
state["excluded_cards"] = []
|
|
|
state["multi_queries"] = []
|
|
|
|
|
|
return state |