notionhive-ai commited on
Commit
535eabf
·
verified ·
1 Parent(s): 20919d9

Upload 4 files

Browse files
Files changed (4) hide show
  1. chatbot_prompt.py +1 -25
  2. faq_routes.py +1 -49
  3. faq_services.py +79 -5
  4. ircc_updater.py +4 -1
chatbot_prompt.py CHANGED
@@ -1,26 +1,2 @@
1
  def generate_prompt(context: str, query: str) -> str:
2
- return f"""You are Noah, an intelligent and friendly virtual assistant by VisaVerse. Your primary responsibility is to assist users by answering their questions accurately and concisely based on the official VisaVerse FAQs and website: https://visaverse.ca.
3
-
4
- When a user greets you, kindly introduce yourself as Noah, VisaVerse’s AI assistant. Avoid any unnecessary greetings or random greetings. Do not greet everytime.
5
-
6
- For all visa, immigration, or VisaVerse-related questions, first try to find answers from the provided FAQs.
7
-
8
- If the answer to a VisaVerse-related question is not found in the FAQs and the question is critical or detailed, you may perform a web search limited to content related to https://visaverse.ca to find accurate and relevant information. If you still cannot answer, politely suggest visiting the website for more information.
9
-
10
- For basic or general non-technical questions (such as “What is a visa?”, “What are the types of study permits?”, “How long does a tourist visa last in general?”, and many more), you are allowed to use web search to provide a brief, factual answer — even if it's not in the VisaVerse FAQ.
11
-
12
- Do not answer personal, medical, financial, or legal advice-based questions from the internet. In those cases, refer the user to VisaVerse's site.
13
-
14
- Avoid using technical terms or programming-related jargon unless directly relevant.
15
-
16
- Always ensure responses are clear, factual, and aligned with VisaVerse’s professional tone.
17
-
18
- When an FAQ-based answer is simply "yes" or "no", rephrase it naturally into a full sentence (e.g., instead of "Yes", say "Yes, you can apply for a visa extension").
19
-
20
- Never fabricate information. If something isn’t covered, politely say it isn’t currently available and suggest visiting the VisaVerse website.
21
- Use the following context to answer the user's question:
22
-
23
- {context}
24
-
25
- User Question: {query}
26
- Answer:"""
 
1
  def generate_prompt(context: str, query: str) -> str:
2
+ return f"""Context:\n{context}\n\nUser Question: {query}\nAnswer:"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faq_routes.py CHANGED
@@ -21,11 +21,6 @@ from ircc_updater import manual_ircc_update_with_result
21
 
22
  router = APIRouter()
23
 
24
- user_question_count = defaultdict(int)
25
- QUESTION_LIMIT = 3
26
- WHATSAPP_LINK = "https://wa.me/+8801712922233"
27
- GREETING_KEYWORDS = {"hi", "hello", "hey", "good morning", "good evening", "good afternoon", "greetings"}
28
- QUESTION_LOG_FILE = "question_limit_log.json"
29
 
30
  # Data validation classes
31
  class QuestionRequest(BaseModel):
@@ -35,53 +30,10 @@ class FAQItem(BaseModel):
35
  question: str
36
  answer: str
37
 
38
- def is_greeting(text: str) -> bool:
39
- lower = text.lower().strip()
40
- return any(greet in lower for greet in GREETING_KEYWORDS) or len(lower) <= 12
41
-
42
- def load_question_log():
43
- if not os.path.exists(QUESTION_LOG_FILE):
44
- return {}
45
- with open(QUESTION_LOG_FILE, "r") as f:
46
- return json.load(f)
47
-
48
- def save_question_log(log_data):
49
- with open(QUESTION_LOG_FILE, "w") as f:
50
- json.dump(log_data, f)
51
-
52
- user_question_count = load_question_log()
53
-
54
- def get_client_ip(request: Request) -> str:
55
- x_forwarded_for = request.headers.get("x-forwarded-for")
56
- if x_forwarded_for:
57
- return x_forwarded_for.split(",")[0].strip()
58
- elif request.client and request.client.host:
59
- return request.client.host
60
-
61
- # Raise error if no IP found
62
- raise HTTPException(status_code=400, detail="Unable to determine client IP address.")
63
-
64
  @router.post("/ask")
65
- async def ask_faq(request: QuestionRequest, http_request: Request):
66
- ip = get_client_ip(http_request)
67
  query = request.query.strip()
68
 
69
- count = user_question_count.get(ip, 0)
70
-
71
- if count >= QUESTION_LIMIT:
72
- return {
73
- "answer": f"For more information. Please contact us on WhatsApp: {WHATSAPP_LINK}"
74
- }
75
-
76
- if not is_greeting(query):
77
- user_question_count[ip] = count + 1
78
- save_question_log(user_question_count)
79
-
80
- if user_question_count[ip] >= QUESTION_LIMIT:
81
- return {
82
- "answer": f"For more information. Please contact us on WhatsApp: {WHATSAPP_LINK}"
83
- }
84
-
85
  results = db.similarity_search(query, k=3)
86
  context = "\n\n".join([doc.page_content for doc in results])
87
  prompt = generate_prompt(context, query)
 
21
 
22
  router = APIRouter()
23
 
 
 
 
 
 
24
 
25
  # Data validation classes
26
  class QuestionRequest(BaseModel):
 
30
  question: str
31
  answer: str
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  @router.post("/ask")
34
+ async def ask_faq(request: QuestionRequest):
 
35
  query = request.query.strip()
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  results = db.similarity_search(query, k=3)
38
  context = "\n\n".join([doc.page_content for doc in results])
39
  prompt = generate_prompt(context, query)
faq_services.py CHANGED
@@ -11,6 +11,9 @@ from langchain.chat_models import ChatOpenAI
11
  from langchain.schema import HumanMessage
12
  from langchain.docstore.document import Document
13
  from langchain_community.document_loaders import CSVLoader
 
 
 
14
 
15
  # ---------------------- Environment Setup ----------------------
16
 
@@ -20,12 +23,45 @@ os.environ["HF_HOME"] = "/tmp/hf_cache" # Optional cleanup
20
  # ---------------------- File & Model Config ----------------------
21
 
22
  faq_path = "faqs.csv"
23
- embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
24
 
25
  # Zilliz (Milvus) Cloud Config
26
  milvus_uri = os.getenv("ZILLIZ_URI")
27
  milvus_token = os.getenv("ZILLIZ_TOKEN")
28
- collection_name = os.getenv("ZILLIZ_COLLECTION", "visaverse_faqs")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # ---------------------- Load FAQ Vector DB ----------------------
31
 
@@ -33,6 +69,14 @@ def load_faqs():
33
  if not os.path.exists(faq_path):
34
  pd.DataFrame(columns=["id", "prompt", "response"]).to_csv(faq_path, index=False, encoding="utf-8")
35
 
 
 
 
 
 
 
 
 
36
  loader = CSVLoader(faq_path, encoding="utf-8")
37
  docs = loader.load()
38
 
@@ -49,17 +93,46 @@ def load_faqs():
49
  collection_name=collection_name,
50
  )
51
 
 
52
  db = load_faqs()
53
 
54
  # ---------------------- LLM Wrapper ----------------------
55
 
56
  def ask_openai(prompt: str) -> str:
57
  chat = ChatOpenAI(
58
- model_name="gpt-4o", # Or use "gpt-3.5-turbo"
59
  temperature=0.5,
60
  openai_api_key=os.getenv("OPENAI_API_KEY")
61
  )
62
- return chat([HumanMessage(content=prompt)]).content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # ---------------------- Append New FAQ to CSV ----------------------
65
 
@@ -69,4 +142,5 @@ def add_faq_to_csv(question: str, answer: str):
69
  new_row = pd.DataFrame([{"id": str(uuid.uuid4()), "prompt": question, "response": answer}])
70
  df = pd.concat([df, new_row], ignore_index=True)
71
  df.to_csv(faq_path, index=False, encoding="utf-8")
72
-
 
 
11
  from langchain.schema import HumanMessage
12
  from langchain.docstore.document import Document
13
  from langchain_community.document_loaders import CSVLoader
14
+ from langchain.schema import SystemMessage, HumanMessage
15
+ from pymilvus import connections, utility, Collection
16
+ from pymilvus.orm.schema import FieldSchema
17
 
18
  # ---------------------- Environment Setup ----------------------
19
 
 
23
  # ---------------------- File & Model Config ----------------------
24
 
25
  faq_path = "faqs.csv"
26
+ embedding_model = OpenAIEmbeddings(
27
+ model="text-embedding-3-small",
28
+ openai_api_key=os.getenv("OPENAI_API_KEY")
29
+ )
30
 
31
  # Zilliz (Milvus) Cloud Config
32
  milvus_uri = os.getenv("ZILLIZ_URI")
33
  milvus_token = os.getenv("ZILLIZ_TOKEN")
34
+ collection_name = os.getenv("ZILLIZ_COLLECTION", "visaverse_faqs3")
35
+
36
+ connections.connect(
37
+ alias="default",
38
+ uri=os.getenv("ZILLIZ_URI"),
39
+ token=os.getenv("ZILLIZ_TOKEN")
40
+ )
41
+
42
+ def ensure_collection_matches_schema(expected_dim: int, collection_name: str, uri: str, token: str):
43
+ try:
44
+ connections.connect(uri=uri, token=token)
45
+
46
+ if utility.has_collection(collection_name):
47
+ schema = Collection(collection_name).schema
48
+ for field in schema.fields:
49
+ if field.dtype.name == "FLOAT_VECTOR":
50
+ if field.params and "dim" in field.params:
51
+ actual_dim = int(field.params["dim"])
52
+ if actual_dim != expected_dim:
53
+ print(f"Collection '{collection_name}' has dim {actual_dim}, expected {expected_dim}. Dropping it.")
54
+ utility.drop_collection(collection_name)
55
+ return
56
+ else:
57
+ print(f"Collection '{collection_name}' has correct dimension: {expected_dim}.")
58
+ return
59
+ print(f"Could not find vector field in collection '{collection_name}'. Dropping for safety.")
60
+ utility.drop_collection(collection_name)
61
+ else:
62
+ print(f"Collection '{collection_name}' does not exist. It will be created.")
63
+ except Exception as e:
64
+ print(f"Failed to validate or drop collection: {e}")
65
 
66
  # ---------------------- Load FAQ Vector DB ----------------------
67
 
 
69
  if not os.path.exists(faq_path):
70
  pd.DataFrame(columns=["id", "prompt", "response"]).to_csv(faq_path, index=False, encoding="utf-8")
71
 
72
+ # Check collection schema
73
+ ensure_collection_matches_schema(
74
+ expected_dim=1536,
75
+ collection_name=collection_name,
76
+ uri=milvus_uri,
77
+ token=milvus_token
78
+ )
79
+
80
  loader = CSVLoader(faq_path, encoding="utf-8")
81
  docs = loader.load()
82
 
 
93
  collection_name=collection_name,
94
  )
95
 
96
+
97
  db = load_faqs()
98
 
99
  # ---------------------- LLM Wrapper ----------------------
100
 
101
  def ask_openai(prompt: str) -> str:
102
  chat = ChatOpenAI(
103
+ model_name="gpt-4o",
104
  temperature=0.5,
105
  openai_api_key=os.getenv("OPENAI_API_KEY")
106
  )
107
+
108
+ system_msg = SystemMessage(content="""
109
+ You are Noah, an intelligent and friendly virtual assistant by VisaVerse. Your primary responsibility is to assist users by answering their questions accurately and concisely based on the official VisaVerse FAQs and the website: https://visaverse.ca.
110
+
111
+ Behavior rules:
112
+ - When a user greets you, kindly introduce yourself as Noah, VisaVerse’s AI assistant.
113
+ - Do NOT greet every time.
114
+ - Do NOT greet more than once in a session.
115
+ - Avoid unnecessary greetings or restating your name repeatedly.
116
+
117
+ Answering rules:
118
+ - For visa, immigration, or VisaVerse-related questions, first try to answer using the provided FAQ context.
119
+ - If the FAQ does not contain the answer, and the question is detailed or important, you may use a web search (only limited to content from https://visaverse.ca).
120
+ - For basic/general visa-related questions, you may answer briefly even if not in the FAQ.
121
+ - If a question is unregistered in the FAQ database or you are unable to answer it confidently, say: "This query is not currently addressed in VisaVerse and IRCC database. For further assistance, please contact our support team at team@visaverse.ca."
122
+
123
+ Additional rules:
124
+ - Never answer personal, legal, financial, or medical questions — always refer users to the official VisaVerse site.
125
+ - Never guess or fabricate information. If unsure, suggest visiting the VisaVerse website or contacting support.
126
+ - Always use clear, neutral, and professional tone.
127
+ - Avoid technical or programming language unless explicitly relevant.
128
+ - If the answer is just “yes” or “no”, rewrite it as a full, natural sentence.
129
+ """)
130
+
131
+ return chat([
132
+ system_msg,
133
+ HumanMessage(content=prompt)
134
+ ]).content.strip()
135
+
136
 
137
  # ---------------------- Append New FAQ to CSV ----------------------
138
 
 
142
  new_row = pd.DataFrame([{"id": str(uuid.uuid4()), "prompt": question, "response": answer}])
143
  df = pd.concat([df, new_row], ignore_index=True)
144
  df.to_csv(faq_path, index=False, encoding="utf-8")
145
+
146
+
ircc_updater.py CHANGED
@@ -8,7 +8,10 @@ from apscheduler.schedulers.background import BackgroundScheduler
8
  import os
9
 
10
  # Config
11
- embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
12
 
13
  ircc_urls = [
14
  "https://www.canada.ca/en/immigration-refugees-citizenship.html",
 
8
  import os
9
 
10
  # Config
11
+ embedding_model = OpenAIEmbeddings(
12
+ model="text-embedding-3-small",
13
+ openai_api_key=os.getenv("OPENAI_API_KEY")
14
+ )
15
 
16
  ircc_urls = [
17
  "https://www.canada.ca/en/immigration-refugees-citizenship.html",