Spaces:
Runtime error
Runtime error
| #Basic Packages | |
| import io | |
| import pandas as pd | |
| import uuid | |
| import traceback | |
| from collections import defaultdict | |
| import time | |
| import os | |
| import json | |
| #API Packages | |
| from fastapi import APIRouter, UploadFile, File, HTTPException, Body, Path, Request | |
| #FAQ CSV Validator Package | |
| from pydantic import BaseModel | |
| #Calling Functions from other py files | |
| from faq_services import ask_openai, load_faqs, add_faq_to_csv, faq_path, style_response, style_response_as_html | |
| from ircc_updater import manual_ircc_update_with_result, search_ircc_pages, fetch_specific_ircc_page, clean_old_ircc_data | |
| router = APIRouter() | |
| # ---------------------- Rate Limiting ---------------------- | |
| RATE_LIMIT = 10 # max questions allowed | |
| RATE_WINDOW = 60 # per this many seconds | |
| request_log: dict[str, list[float]] = defaultdict(list) | |
| def check_rate_limit(ip: str): | |
| """Allow up to RATE_LIMIT requests per RATE_WINDOW seconds per IP.""" | |
| now = time.time() | |
| # Keep only timestamps within the current window | |
| request_log[ip] = [t for t in request_log[ip] if now - t < RATE_WINDOW] | |
| if len(request_log[ip]) >= RATE_LIMIT: | |
| raise HTTPException( | |
| status_code=429, | |
| detail=f"Rate limit exceeded. Maximum {RATE_LIMIT} questions per minute. Please wait and try again." | |
| ) | |
| request_log[ip].append(now) | |
| # Data validation classes | |
| class QuestionRequest(BaseModel): | |
| query: str | |
| class FAQItem(BaseModel): | |
| question: str | |
| answer: str | |
| async def ask_faq(request: QuestionRequest, req: Request): | |
| # Enforce rate limit per IP | |
| client_ip = req.client.host if req.client else "unknown" | |
| check_rate_limit(client_ip) | |
| query = request.query.strip() | |
| try: | |
| raw = ask_openai(query) | |
| html_answer = style_response(raw) # already-final HTML | |
| return {"answer": html_answer} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # Add Single FAQ API | |
| async def add_faq(faq: FAQItem): | |
| try: | |
| df = pd.read_csv(faq_path, encoding="utf-8") | |
| if ((df["prompt"] == faq.question) & (df["response"] == faq.answer)).any(): | |
| raise HTTPException(status_code=400, detail="FAQ already exists.") | |
| new_df = pd.DataFrame([{"id": str(uuid.uuid4()), "prompt": faq.question, "response": faq.answer}]) | |
| updated_df = pd.concat([df, new_df], ignore_index=True) | |
| updated_df.to_csv(faq_path, index=False, encoding="utf-8") | |
| global db | |
| db = load_faqs() | |
| return {"message": "FAQ added successfully."} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # Upload CSV API | |
| async def upload_faqs_csv(file: UploadFile = File(...)): | |
| if not file.filename.endswith(".csv"): | |
| return { | |
| "status": "error", | |
| "message": "Invalid file type", | |
| "error": "Only CSV files are supported." | |
| } | |
| try: | |
| contents = await file.read() | |
| df = pd.read_csv(io.BytesIO(contents)) | |
| if "question" not in df.columns or "answer" not in df.columns: | |
| return { | |
| "status": "error", | |
| "message": "Invalid CSV structure", | |
| "error": "CSV must contain 'question' and 'answer' columns." | |
| } | |
| for _, row in df.iterrows(): | |
| question = str(row["question"]).strip() | |
| answer = str(row["answer"]).strip() | |
| if question and answer: | |
| add_faq_to_csv(question, answer) | |
| global db | |
| db = load_faqs() | |
| return { | |
| "status": "success", | |
| "message": "FAQs uploaded and added successfully." | |
| } | |
| except Exception as e: | |
| traceback.print_exc() | |
| return { | |
| "status": "error", | |
| "message": "Failed to process CSV", | |
| "error": str(e) | |
| } | |
| # Delete Single FAQ API | |
| async def delete_faq(faq: FAQItem = Body(...)): | |
| try: | |
| df = pd.read_csv(faq_path, encoding="utf-8") | |
| filtered_df = df[~((df["prompt"] == faq.question) & (df["response"] == faq.answer))] | |
| if len(df) == len(filtered_df): | |
| raise HTTPException(status_code=404, detail="FAQ not found.") | |
| filtered_df.to_csv(faq_path, index=False, encoding="utf-8") | |
| global db | |
| db = load_faqs() | |
| return {"message": "FAQ deleted successfully."} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def delete_faq_by_id(faq_id: str = Path(...)): | |
| try: | |
| df = pd.read_csv(faq_path, encoding="utf-8") | |
| if "id" not in df.columns: | |
| raise HTTPException(status_code=500, detail="CSV does not contain 'id' column.") | |
| filtered_df = df[df["id"] != faq_id] | |
| if len(filtered_df) == len(df): | |
| raise HTTPException(status_code=404, detail="FAQ with given ID not found.") | |
| filtered_df.to_csv(faq_path, index=False, encoding="utf-8") | |
| global db | |
| db = load_faqs() | |
| return {"message": f"FAQ with ID {faq_id} deleted successfully."} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # Delete All FAQs API | |
| async def delete_all_faqs(): | |
| try: | |
| pd.DataFrame(columns=["id", "prompt", "response"]).to_csv(faq_path, index=False, encoding="utf-8") | |
| global db | |
| db = load_faqs() | |
| return {"message": "All FAQs deleted successfully."} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # Show All FAQs API | |
| async def get_faqs(): | |
| try: | |
| df = pd.read_csv(faq_path, encoding="utf-8") | |
| df = df.astype(str) | |
| result = df.rename(columns={"prompt": "question", "response": "answer"}).to_dict(orient="records") | |
| return result | |
| except FileNotFoundError: | |
| raise HTTPException(status_code=404, detail="FAQ CSV file not found.") | |
| except pd.errors.ParserError as e: | |
| raise HTTPException(status_code=500, detail=f"CSV Parsing Error: {str(e)}") | |
| except UnicodeDecodeError as e: | |
| raise HTTPException(status_code=500, detail=f"Encoding Error: {str(e)}") | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Unexpected Error: {str(e)}") | |
| # Retrain DB | |
| async def retrain_db(): | |
| try: | |
| global db | |
| db = load_faqs() | |
| return {"message": "Chatbot retrained successfully."} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def update_ircc_faqs(): | |
| try: | |
| added = manual_ircc_update_with_result() | |
| # fallback if None | |
| if not added: | |
| return { | |
| "message": "IRCC FAQs updated manually.", | |
| "added_count": 0, | |
| "entries": [] | |
| } | |
| return { | |
| "message": "IRCC FAQs updated manually.", | |
| "added_count": len(added), | |
| "entries": added | |
| } | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def search_ircc(request: QuestionRequest): | |
| """ | |
| Search IRCC pages and return relevant page links (like IRCC search box) | |
| """ | |
| try: | |
| query = request.query.strip() | |
| results = search_ircc_pages(query) | |
| return { | |
| "query": query, | |
| "results_count": len(results), | |
| "pages": results | |
| } | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def fetch_ircc_page(request: dict = Body(...)): | |
| """ | |
| Fetch real-time content from a specific IRCC page URL | |
| """ | |
| try: | |
| url = request.get("url", "").strip() | |
| if not url.startswith("https://www.canada.ca"): | |
| raise HTTPException(status_code=400, detail="Invalid IRCC URL") | |
| page_data = fetch_specific_ircc_page(url) | |
| if not page_data: | |
| raise HTTPException(status_code=404, detail="Failed to fetch page") | |
| return page_data | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def rebuild_ircc_data(): | |
| """ | |
| Force a full IRCC data rebuild from scratch. | |
| Use this monthly or when you notice conflicting information. | |
| """ | |
| try: | |
| import ircc_updater | |
| # Reset the content hash so update_ircc_embeddings() is forced to rebuild | |
| ircc_updater._last_content_hash = None | |
| added = manual_ircc_update_with_result() | |
| return { | |
| "message": "IRCC data rebuilt successfully", | |
| "status": "success", | |
| "ircc_pages_loaded": len(added) if added else 0, | |
| "note": "All old IRCC data removed, fresh data loaded" | |
| } | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |