Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Form | |
| from fastapi.responses import JSONResponse | |
| from pathlib import Path | |
| from fastapi.responses import JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from rag import build_vector_index, query_from_vector, PDF_DIR | |
| from dotenv import load_dotenv | |
| from utils import * | |
| import threading | |
| import os | |
| import traceback | |
| import logging | |
| # ------------------ INIT ------------------ | |
| app = FastAPI(title="Virtual Assistant Chatbot API", version="1.0") | |
| # ------------------ CORS ------------------ | |
| origins = [ | |
| "https://chabot.demo.viproject.net", | |
| "chabot.demo.viproject.net" | |
| ] | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=origins, | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| logger = logging.getLogger(__name__) | |
| logging.basicConfig(level=logging.INFO) | |
| os.makedirs(PDF_DIR, exist_ok=True) | |
| OUTPUT_FOLDER = Path("documents") | |
| OUTPUT_FOLDER.mkdir(exist_ok=True) | |
| # ------------------ load env ------------------ | |
| load_dotenv() | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| init_openai(OPENAI_API_KEY) | |
| # ------------------ HELPER ------------------ | |
| def clear_documents_folder(): | |
| """Hapus semua file di folder documents""" | |
| for file in OUTPUT_FOLDER.iterdir(): | |
| if file.is_file(): | |
| file.unlink() | |
| # ------------------ ROUTES ------------------ | |
| def root(): | |
| """Cek status API""" | |
| return {"message": "Virtual Assistant API aktif dan siap digunakan!"} | |
| async def build_vector_db(): | |
| """Bangun FAISS vector database dari semua PDF""" | |
| try: | |
| result = build_vector_index() | |
| return JSONResponse(result) | |
| except Exception as e: | |
| tb = traceback.format_exc() | |
| logger.error(f"/build_vector_db error: {e}\n{tb}") | |
| return JSONResponse({"error": str(e), "traceback": tb}, status_code=500) | |
| async def scrape_urls(filename: str = Form(...), urls: str = Form(...), location: str = "server"): | |
| """ | |
| Ambil semua URL, gabungkan teks → bersihkan → satu PDF utuh | |
| """ | |
| # Hapus semua file lama | |
| clear_documents_folder() | |
| # Parse URL | |
| url_list = [u.strip() for u in urls.split(",") if u.strip()] | |
| extracted_texts = [None] * len(url_list) | |
| threads = [] | |
| # STEP 1: Ekstraksi halaman | |
| def worker_extract(i, url): | |
| driver = init_driver_local() if location == "local" else init_driver() | |
| try: | |
| extracted_texts[i] = fetch_page_text(driver, url) | |
| finally: | |
| driver.quit() | |
| for i, url in enumerate(url_list): | |
| t = threading.Thread(target=worker_extract, args=(i, url)) | |
| t.start() | |
| threads.append(t) | |
| for t in threads: | |
| t.join() | |
| # STEP 2: Gabungkan semua teks | |
| combined_text = "" | |
| for url, text in zip(url_list, extracted_texts): | |
| combined_text += f"===== URL: {url} =====\n{text}\n\n" | |
| # STEP 3: Bersihkan dengan OpenAI | |
| cleaned_text = clean_text_with_openai(combined_text) | |
| # STEP 4: Simpan PDF | |
| output_file = OUTPUT_FOLDER / f"{filename}.pdf" | |
| save_to_pdf(cleaned_text, output_file) | |
| return JSONResponse({"success": True, "pdf_file": str(output_file)}) | |
| async def ask_question( | |
| question: str = Form(...), | |
| session_id: str = Form(None), | |
| ): | |
| """Ajukan pertanyaan ke dokumen yang sudah diindeks dengan session chat""" | |
| try: | |
| result = query_from_vector( | |
| query=question, | |
| session_id=session_id, | |
| ) | |
| return JSONResponse(result) | |
| except Exception as e: | |
| tb = traceback.format_exc() | |
| return JSONResponse({"error": str(e), "traceback": tb}, status_code=500) |