Spaces:

Notionhive
/

visaverse-chatbot

Runtime error

App Files Files Community

notionhive-ai commited on Aug 6, 2025

Commit

535eabf

verified ·

1 Parent(s): 20919d9

Upload 4 files

Browse files

Files changed (4) hide show

chatbot_prompt.py +1 -25
faq_routes.py +1 -49
faq_services.py +79 -5
ircc_updater.py +4 -1

chatbot_prompt.py CHANGED Viewed

@@ -1,26 +1,2 @@
 def generate_prompt(context: str, query: str) -> str:
-    return f"""You are Noah, an intelligent and friendly virtual assistant by VisaVerse. Your primary responsibility is to assist users by answering their questions accurately and concisely based on the official VisaVerse FAQs and website: https://visaverse.ca.
-When a user greets you, kindly introduce yourself as Noah, VisaVerse’s AI assistant. Avoid any unnecessary greetings or random greetings. Do not greet everytime.
-For all visa, immigration, or VisaVerse-related questions, first try to find answers from the provided FAQs.
-If the answer to a VisaVerse-related question is not found in the FAQs and the question is critical or detailed, you may perform a web search limited to content related to https://visaverse.ca to find accurate and relevant information. If you still cannot answer, politely suggest visiting the website for more information.
-For basic or general non-technical questions (such as “What is a visa?”, “What are the types of study permits?”, “How long does a tourist visa last in general?”, and many more), you are allowed to use web search to provide a brief, factual answer — even if it's not in the VisaVerse FAQ.
-Do not answer personal, medical, financial, or legal advice-based questions from the internet. In those cases, refer the user to VisaVerse's site.
-Avoid using technical terms or programming-related jargon unless directly relevant.
-Always ensure responses are clear, factual, and aligned with VisaVerse’s professional tone.
-When an FAQ-based answer is simply "yes" or "no", rephrase it naturally into a full sentence (e.g., instead of "Yes", say "Yes, you can apply for a visa extension").
-Never fabricate information. If something isn’t covered, politely say it isn’t currently available and suggest visiting the VisaVerse website.
-Use the following context to answer the user's question:
-{context}
-User Question: {query}
-Answer:"""


1	def generate_prompt(context: str, query: str) -> str:
2	+ return f"""Context:\n{context}\n\nUser Question: {query}\nAnswer:"""

faq_routes.py CHANGED Viewed

@@ -21,11 +21,6 @@ from ircc_updater import manual_ircc_update_with_result
 router = APIRouter()
-user_question_count = defaultdict(int)
-QUESTION_LIMIT = 3
-WHATSAPP_LINK = "https://wa.me/+8801712922233"
-GREETING_KEYWORDS = {"hi", "hello", "hey", "good morning", "good evening", "good afternoon", "greetings"}
-QUESTION_LOG_FILE = "question_limit_log.json"
 # Data validation classes
 class QuestionRequest(BaseModel):
@@ -35,53 +30,10 @@ class FAQItem(BaseModel):
     question: str
     answer: str
-def is_greeting(text: str) -> bool:
-    lower = text.lower().strip()
-    return any(greet in lower for greet in GREETING_KEYWORDS) or len(lower) <= 12
-def load_question_log():
-    if not os.path.exists(QUESTION_LOG_FILE):
-        return {}
-    with open(QUESTION_LOG_FILE, "r") as f:
-        return json.load(f)
-def save_question_log(log_data):
-    with open(QUESTION_LOG_FILE, "w") as f:
-        json.dump(log_data, f)
-user_question_count = load_question_log()
-def get_client_ip(request: Request) -> str:
-    x_forwarded_for = request.headers.get("x-forwarded-for")
-    if x_forwarded_for:
-        return x_forwarded_for.split(",")[0].strip()
-    elif request.client and request.client.host:
-        return request.client.host
-    # Raise error if no IP found
-    raise HTTPException(status_code=400, detail="Unable to determine client IP address.")
 @router.post("/ask")
-async def ask_faq(request: QuestionRequest, http_request: Request):
-    ip = get_client_ip(http_request)
     query = request.query.strip()
-    count = user_question_count.get(ip, 0)
-    if count >= QUESTION_LIMIT:
-        return {
-            "answer": f"For more information. Please contact us on WhatsApp: {WHATSAPP_LINK}"
-        }
-    if not is_greeting(query):
-        user_question_count[ip] = count + 1
-        save_question_log(user_question_count)
-        if user_question_count[ip] >= QUESTION_LIMIT:
-            return {
-                "answer": f"For more information. Please contact us on WhatsApp: {WHATSAPP_LINK}"
-            }
     results = db.similarity_search(query, k=3)
     context = "\n\n".join([doc.page_content for doc in results])
     prompt = generate_prompt(context, query)

 router = APIRouter()
 # Data validation classes
 class QuestionRequest(BaseModel):
     question: str
     answer: str
 @router.post("/ask")
+async def ask_faq(request: QuestionRequest):
     query = request.query.strip()
     results = db.similarity_search(query, k=3)
     context = "\n\n".join([doc.page_content for doc in results])
     prompt = generate_prompt(context, query)

faq_services.py CHANGED Viewed

@@ -11,6 +11,9 @@ from langchain.chat_models import ChatOpenAI
 from langchain.schema import HumanMessage
 from langchain.docstore.document import Document
 from langchain_community.document_loaders import CSVLoader
 # ---------------------- Environment Setup ----------------------
@@ -20,12 +23,45 @@ os.environ["HF_HOME"] = "/tmp/hf_cache"  # Optional cleanup
 # ---------------------- File & Model Config ----------------------
 faq_path = "faqs.csv"
-embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
 # Zilliz (Milvus) Cloud Config
 milvus_uri = os.getenv("ZILLIZ_URI")
 milvus_token = os.getenv("ZILLIZ_TOKEN")
-collection_name = os.getenv("ZILLIZ_COLLECTION", "visaverse_faqs")
 # ---------------------- Load FAQ Vector DB ----------------------
@@ -33,6 +69,14 @@ def load_faqs():
     if not os.path.exists(faq_path):
         pd.DataFrame(columns=["id", "prompt", "response"]).to_csv(faq_path, index=False, encoding="utf-8")
     loader = CSVLoader(faq_path, encoding="utf-8")
     docs = loader.load()
@@ -49,17 +93,46 @@ def load_faqs():
         collection_name=collection_name,
     )
 db = load_faqs()
 # ---------------------- LLM Wrapper ----------------------
 def ask_openai(prompt: str) -> str:
     chat = ChatOpenAI(
-        model_name="gpt-4o",  # Or use "gpt-3.5-turbo"
         temperature=0.5,
         openai_api_key=os.getenv("OPENAI_API_KEY")
     )
-    return chat([HumanMessage(content=prompt)]).content.strip()
 # ---------------------- Append New FAQ to CSV ----------------------
@@ -69,4 +142,5 @@ def add_faq_to_csv(question: str, answer: str):
         new_row = pd.DataFrame([{"id": str(uuid.uuid4()), "prompt": question, "response": answer}])
         df = pd.concat([df, new_row], ignore_index=True)
         df.to_csv(faq_path, index=False, encoding="utf-8")

 from langchain.schema import HumanMessage
 from langchain.docstore.document import Document
 from langchain_community.document_loaders import CSVLoader
+from langchain.schema import SystemMessage, HumanMessage
+from pymilvus import connections, utility, Collection
+from pymilvus.orm.schema import FieldSchema
 # ---------------------- Environment Setup ----------------------
 # ---------------------- File & Model Config ----------------------
 faq_path = "faqs.csv"
+embedding_model = OpenAIEmbeddings(
+    model="text-embedding-3-small",
+    openai_api_key=os.getenv("OPENAI_API_KEY")
+)
 # Zilliz (Milvus) Cloud Config
 milvus_uri = os.getenv("ZILLIZ_URI")
 milvus_token = os.getenv("ZILLIZ_TOKEN")
+collection_name = os.getenv("ZILLIZ_COLLECTION", "visaverse_faqs3")
+connections.connect(
+    alias="default",
+    uri=os.getenv("ZILLIZ_URI"),
+    token=os.getenv("ZILLIZ_TOKEN")
+)
+def ensure_collection_matches_schema(expected_dim: int, collection_name: str, uri: str, token: str):
+    try:
+        connections.connect(uri=uri, token=token)
+        if utility.has_collection(collection_name):
+            schema = Collection(collection_name).schema
+            for field in schema.fields:
+                if field.dtype.name == "FLOAT_VECTOR":
+                    if field.params and "dim" in field.params:
+                        actual_dim = int(field.params["dim"])
+                        if actual_dim != expected_dim:
+                            print(f"Collection '{collection_name}' has dim {actual_dim}, expected {expected_dim}. Dropping it.")
+                            utility.drop_collection(collection_name)
+                            return
+                        else:
+                            print(f"Collection '{collection_name}' has correct dimension: {expected_dim}.")
+                            return
+            print(f"Could not find vector field in collection '{collection_name}'. Dropping for safety.")
+            utility.drop_collection(collection_name)
+        else:
+            print(f"Collection '{collection_name}' does not exist. It will be created.")
+    except Exception as e:
+        print(f"Failed to validate or drop collection: {e}")
 # ---------------------- Load FAQ Vector DB ----------------------
     if not os.path.exists(faq_path):
         pd.DataFrame(columns=["id", "prompt", "response"]).to_csv(faq_path, index=False, encoding="utf-8")
+    # Check collection schema
+    ensure_collection_matches_schema(
+        expected_dim=1536,
+        collection_name=collection_name,
+        uri=milvus_uri,
+        token=milvus_token
+    )
     loader = CSVLoader(faq_path, encoding="utf-8")
     docs = loader.load()
         collection_name=collection_name,
     )
 db = load_faqs()
 # ---------------------- LLM Wrapper ----------------------
 def ask_openai(prompt: str) -> str:
     chat = ChatOpenAI(
+        model_name="gpt-4o",
         temperature=0.5,
         openai_api_key=os.getenv("OPENAI_API_KEY")
     )
+    system_msg = SystemMessage(content="""
+You are Noah, an intelligent and friendly virtual assistant by VisaVerse. Your primary responsibility is to assist users by answering their questions accurately and concisely based on the official VisaVerse FAQs and the website: https://visaverse.ca.
+Behavior rules:
+- When a user greets you, kindly introduce yourself as Noah, VisaVerse’s AI assistant.
+- Do NOT greet every time.
+- Do NOT greet more than once in a session.
+- Avoid unnecessary greetings or restating your name repeatedly.
+Answering rules:
+- For visa, immigration, or VisaVerse-related questions, first try to answer using the provided FAQ context.
+- If the FAQ does not contain the answer, and the question is detailed or important, you may use a web search (only limited to content from https://visaverse.ca).
+- For basic/general visa-related questions, you may answer briefly even if not in the FAQ.
+- If a question is unregistered in the FAQ database or you are unable to answer it confidently, say: "This query is not currently addressed in VisaVerse and IRCC database. For further assistance, please contact our support team at team@visaverse.ca."
+Additional rules:
+- Never answer personal, legal, financial, or medical questions — always refer users to the official VisaVerse site.
+- Never guess or fabricate information. If unsure, suggest visiting the VisaVerse website or contacting support.
+- Always use clear, neutral, and professional tone.
+- Avoid technical or programming language unless explicitly relevant.
+- If the answer is just “yes” or “no”, rewrite it as a full, natural sentence.
+""")
+    return chat([
+        system_msg,
+        HumanMessage(content=prompt)
+    ]).content.strip()
 # ---------------------- Append New FAQ to CSV ----------------------
         new_row = pd.DataFrame([{"id": str(uuid.uuid4()), "prompt": question, "response": answer}])
         df = pd.concat([df, new_row], ignore_index=True)
         df.to_csv(faq_path, index=False, encoding="utf-8")

ircc_updater.py CHANGED Viewed

@@ -8,7 +8,10 @@ from apscheduler.schedulers.background import BackgroundScheduler
 import os
 # Config
-embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
 ircc_urls = [
     "https://www.canada.ca/en/immigration-refugees-citizenship.html",

 import os
 # Config
+embedding_model = OpenAIEmbeddings(
+    model="text-embedding-3-small",
+    openai_api_key=os.getenv("OPENAI_API_KEY")
+)
 ircc_urls = [
     "https://www.canada.ca/en/immigration-refugees-citizenship.html",