Spaces:
Sleeping
Sleeping
File size: 3,715 Bytes
5de9ee6 94c1837 5de9ee6 94c1837 5de9ee6 94c1837 5de9ee6 94c1837 5de9ee6 94c1837 b7caa49 94c1837 5de9ee6 94c1837 b720cc9 94c1837 b7caa49 94c1837 b7caa49 94c1837 b7caa49 b720cc9 b7caa49 94c1837 b7caa49 94c1837 b7caa49 94c1837 5de9ee6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
from fastapi import FastAPI, Form
from fastapi.responses import JSONResponse
from pathlib import Path
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from rag import build_vector_index, query_from_vector, PDF_DIR
from dotenv import load_dotenv
from utils import *
import threading
import os
import traceback
import logging
# ------------------ INIT ------------------
app = FastAPI(title="Virtual Assistant Chatbot API", version="1.0")
# ------------------ CORS ------------------
origins = [
"https://chabot.demo.viproject.net",
"chabot.demo.viproject.net"
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
os.makedirs(PDF_DIR, exist_ok=True)
OUTPUT_FOLDER = Path("documents")
OUTPUT_FOLDER.mkdir(exist_ok=True)
# ------------------ load env ------------------
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
init_openai(OPENAI_API_KEY)
# ------------------ HELPER ------------------
def clear_documents_folder():
"""Hapus semua file di folder documents"""
for file in OUTPUT_FOLDER.iterdir():
if file.is_file():
file.unlink()
# ------------------ ROUTES ------------------
@app.get("/")
def root():
"""Cek status API"""
return {"message": "Virtual Assistant API aktif dan siap digunakan!"}
@app.post("/build-knowledge")
async def build_vector_db():
"""Bangun FAISS vector database dari semua PDF"""
try:
result = build_vector_index()
return JSONResponse(result)
except Exception as e:
tb = traceback.format_exc()
logger.error(f"/build_vector_db error: {e}\n{tb}")
return JSONResponse({"error": str(e), "traceback": tb}, status_code=500)
@app.post("/web-scraping/")
async def scrape_urls(filename: str = Form(...), urls: str = Form(...), location: str = "server"):
"""
Ambil semua URL, gabungkan teks → bersihkan → satu PDF utuh
"""
# Hapus semua file lama
clear_documents_folder()
# Parse URL
url_list = [u.strip() for u in urls.split(",") if u.strip()]
extracted_texts = [None] * len(url_list)
threads = []
# STEP 1: Ekstraksi halaman
def worker_extract(i, url):
driver = init_driver_local() if location == "local" else init_driver()
try:
extracted_texts[i] = fetch_page_text(driver, url)
finally:
driver.quit()
for i, url in enumerate(url_list):
t = threading.Thread(target=worker_extract, args=(i, url))
t.start()
threads.append(t)
for t in threads:
t.join()
# STEP 2: Gabungkan semua teks
combined_text = ""
for url, text in zip(url_list, extracted_texts):
combined_text += f"===== URL: {url} =====\n{text}\n\n"
# STEP 3: Bersihkan dengan OpenAI
cleaned_text = clean_text_with_openai(combined_text)
# STEP 4: Simpan PDF
output_file = OUTPUT_FOLDER / f"{filename}.pdf"
save_to_pdf(cleaned_text, output_file)
return JSONResponse({"success": True, "pdf_file": str(output_file)})
@app.post("/ask")
async def ask_question(
question: str = Form(...),
session_id: str = Form(None),
):
"""Ajukan pertanyaan ke dokumen yang sudah diindeks dengan session chat"""
try:
result = query_from_vector(
query=question,
session_id=session_id,
)
return JSONResponse(result)
except Exception as e:
tb = traceback.format_exc()
return JSONResponse({"error": str(e), "traceback": tb}, status_code=500) |