visaverse-chatbot / faq_routes.py
shakauthossain's picture
Added Rate Limit and Other things
0c0d03a
#Basic Packages
import io
import pandas as pd
import uuid
import traceback
from collections import defaultdict
import time
import os
import json
#API Packages
from fastapi import APIRouter, UploadFile, File, HTTPException, Body, Path, Request
#FAQ CSV Validator Package
from pydantic import BaseModel
#Calling Functions from other py files
from faq_services import ask_openai, load_faqs, add_faq_to_csv, faq_path, style_response, style_response_as_html
from ircc_updater import manual_ircc_update_with_result, search_ircc_pages, fetch_specific_ircc_page, clean_old_ircc_data
router = APIRouter()
# ---------------------- Rate Limiting ----------------------
RATE_LIMIT = 10 # max questions allowed
RATE_WINDOW = 60 # per this many seconds
request_log: dict[str, list[float]] = defaultdict(list)
def check_rate_limit(ip: str):
"""Allow up to RATE_LIMIT requests per RATE_WINDOW seconds per IP."""
now = time.time()
# Keep only timestamps within the current window
request_log[ip] = [t for t in request_log[ip] if now - t < RATE_WINDOW]
if len(request_log[ip]) >= RATE_LIMIT:
raise HTTPException(
status_code=429,
detail=f"Rate limit exceeded. Maximum {RATE_LIMIT} questions per minute. Please wait and try again."
)
request_log[ip].append(now)
# Data validation classes
class QuestionRequest(BaseModel):
query: str
class FAQItem(BaseModel):
question: str
answer: str
@router.post("/ask")
async def ask_faq(request: QuestionRequest, req: Request):
# Enforce rate limit per IP
client_ip = req.client.host if req.client else "unknown"
check_rate_limit(client_ip)
query = request.query.strip()
try:
raw = ask_openai(query)
html_answer = style_response(raw) # already-final HTML
return {"answer": html_answer}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Add Single FAQ API
@router.post("/add_faq")
async def add_faq(faq: FAQItem):
try:
df = pd.read_csv(faq_path, encoding="utf-8")
if ((df["prompt"] == faq.question) & (df["response"] == faq.answer)).any():
raise HTTPException(status_code=400, detail="FAQ already exists.")
new_df = pd.DataFrame([{"id": str(uuid.uuid4()), "prompt": faq.question, "response": faq.answer}])
updated_df = pd.concat([df, new_df], ignore_index=True)
updated_df.to_csv(faq_path, index=False, encoding="utf-8")
global db
db = load_faqs()
return {"message": "FAQ added successfully."}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Upload CSV API
@router.post("/upload_faqs_csv")
async def upload_faqs_csv(file: UploadFile = File(...)):
if not file.filename.endswith(".csv"):
return {
"status": "error",
"message": "Invalid file type",
"error": "Only CSV files are supported."
}
try:
contents = await file.read()
df = pd.read_csv(io.BytesIO(contents))
if "question" not in df.columns or "answer" not in df.columns:
return {
"status": "error",
"message": "Invalid CSV structure",
"error": "CSV must contain 'question' and 'answer' columns."
}
for _, row in df.iterrows():
question = str(row["question"]).strip()
answer = str(row["answer"]).strip()
if question and answer:
add_faq_to_csv(question, answer)
global db
db = load_faqs()
return {
"status": "success",
"message": "FAQs uploaded and added successfully."
}
except Exception as e:
traceback.print_exc()
return {
"status": "error",
"message": "Failed to process CSV",
"error": str(e)
}
# Delete Single FAQ API
@router.delete("/delete_faq")
async def delete_faq(faq: FAQItem = Body(...)):
try:
df = pd.read_csv(faq_path, encoding="utf-8")
filtered_df = df[~((df["prompt"] == faq.question) & (df["response"] == faq.answer))]
if len(df) == len(filtered_df):
raise HTTPException(status_code=404, detail="FAQ not found.")
filtered_df.to_csv(faq_path, index=False, encoding="utf-8")
global db
db = load_faqs()
return {"message": "FAQ deleted successfully."}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/deleted/{faq_id}")
async def delete_faq_by_id(faq_id: str = Path(...)):
try:
df = pd.read_csv(faq_path, encoding="utf-8")
if "id" not in df.columns:
raise HTTPException(status_code=500, detail="CSV does not contain 'id' column.")
filtered_df = df[df["id"] != faq_id]
if len(filtered_df) == len(df):
raise HTTPException(status_code=404, detail="FAQ with given ID not found.")
filtered_df.to_csv(faq_path, index=False, encoding="utf-8")
global db
db = load_faqs()
return {"message": f"FAQ with ID {faq_id} deleted successfully."}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Delete All FAQs API
@router.delete("/delete/destroyall")
async def delete_all_faqs():
try:
pd.DataFrame(columns=["id", "prompt", "response"]).to_csv(faq_path, index=False, encoding="utf-8")
global db
db = load_faqs()
return {"message": "All FAQs deleted successfully."}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Show All FAQs API
@router.get("/get_faqs")
async def get_faqs():
try:
df = pd.read_csv(faq_path, encoding="utf-8")
df = df.astype(str)
result = df.rename(columns={"prompt": "question", "response": "answer"}).to_dict(orient="records")
return result
except FileNotFoundError:
raise HTTPException(status_code=404, detail="FAQ CSV file not found.")
except pd.errors.ParserError as e:
raise HTTPException(status_code=500, detail=f"CSV Parsing Error: {str(e)}")
except UnicodeDecodeError as e:
raise HTTPException(status_code=500, detail=f"Encoding Error: {str(e)}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Unexpected Error: {str(e)}")
# Retrain DB
@router.post("/retrain")
async def retrain_db():
try:
global db
db = load_faqs()
return {"message": "Chatbot retrained successfully."}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/update_ircc_faqs")
async def update_ircc_faqs():
try:
added = manual_ircc_update_with_result()
# fallback if None
if not added:
return {
"message": "IRCC FAQs updated manually.",
"added_count": 0,
"entries": []
}
return {
"message": "IRCC FAQs updated manually.",
"added_count": len(added),
"entries": added
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/search_ircc")
async def search_ircc(request: QuestionRequest):
"""
Search IRCC pages and return relevant page links (like IRCC search box)
"""
try:
query = request.query.strip()
results = search_ircc_pages(query)
return {
"query": query,
"results_count": len(results),
"pages": results
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/fetch_ircc_page")
async def fetch_ircc_page(request: dict = Body(...)):
"""
Fetch real-time content from a specific IRCC page URL
"""
try:
url = request.get("url", "").strip()
if not url.startswith("https://www.canada.ca"):
raise HTTPException(status_code=400, detail="Invalid IRCC URL")
page_data = fetch_specific_ircc_page(url)
if not page_data:
raise HTTPException(status_code=404, detail="Failed to fetch page")
return page_data
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/rebuild_ircc_data")
async def rebuild_ircc_data():
"""
Force a full IRCC data rebuild from scratch.
Use this monthly or when you notice conflicting information.
"""
try:
import ircc_updater
# Reset the content hash so update_ircc_embeddings() is forced to rebuild
ircc_updater._last_content_hash = None
added = manual_ircc_update_with_result()
return {
"message": "IRCC data rebuilt successfully",
"status": "success",
"ircc_pages_loaded": len(added) if added else 0,
"note": "All old IRCC data removed, fresh data loaded"
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))