shakeel143 commited on
Commit
7eef49a
·
verified ·
1 Parent(s): c0e34f8

Create chatbot_service.py

Browse files
Files changed (1) hide show
  1. chatbot_service.py +76 -0
chatbot_service.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/services/chatbot_service.py
2
+
3
+ import os
4
+ import pickle
5
+ from typing import List
6
+ from fastapi import HTTPException, UploadFile, File
7
+ from PyPDF2 import PdfReader
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from app.services.drive_service import DriveService
12
+ from app.config import CUSTOM_CHATBOTS_DIR
13
+
14
+ class ChatbotService:
15
+ def __init__(self):
16
+ self.drive_service = DriveService()
17
+
18
+ def create_chatbot(self, folder_name: str, pdf_files: List[UploadFile]):
19
+ """Create a new chatbot using only PDF files."""
20
+ try:
21
+ # Validate input
22
+ if not pdf_files:
23
+ raise HTTPException(status_code=400, detail="At least one PDF file is required.")
24
+
25
+ # Create folder for the chatbot
26
+ folder_path = os.path.join(CUSTOM_CHATBOTS_DIR, folder_name)
27
+ os.makedirs(folder_path, exist_ok=True)
28
+
29
+ # Process PDFs
30
+ raw_text = ""
31
+ for pdf_file in pdf_files:
32
+ pdf_reader = PdfReader(pdf_file.file)
33
+ for page in pdf_reader.pages:
34
+ raw_text += page.extract_text() or ""
35
+
36
+ # Split text into chunks
37
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
38
+ text_chunks = text_splitter.split_text(raw_text)
39
+
40
+ # Create vector store
41
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
42
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
43
+
44
+ # Save the vector store and documents
45
+ vector_store.save_local(os.path.join(folder_path, "faiss_index"))
46
+ with open(os.path.join(folder_path, "documents.pkl"), 'wb') as f:
47
+ pickle.dump(text_chunks, f)
48
+
49
+ # Upload files to Google Drive
50
+ # Step 1: Create or find the folder with the provided `folder_name`
51
+ chatbot_folder_id = self.drive_service.find_folder_by_name(folder_name, GOOGLE_DRIVE_FOLDER_ID)
52
+ if not chatbot_folder_id:
53
+ chatbot_folder_id = self.drive_service.create_folder_in_google_drive(folder_name, GOOGLE_DRIVE_FOLDER_ID)
54
+
55
+ # Step 2: Create or find the `faiss_index` subfolder inside the `folder_name` folder
56
+ faiss_index_folder_id = self.drive_service.find_folder_by_name("faiss_index", chatbot_folder_id)
57
+ if not faiss_index_folder_id:
58
+ faiss_index_folder_id = self.drive_service.create_folder_in_google_drive("faiss_index", chatbot_folder_id)
59
+
60
+ # Step 3: Upload index.faiss and index.pkl to the `faiss_index` subfolder
61
+ index_pkl_path = os.path.join(folder_path, "faiss_index", "index.pkl")
62
+ os.rename(os.path.join(folder_path, "documents.pkl"), index_pkl_path)
63
+
64
+ faiss_index_path = os.path.join(folder_path, "faiss_index", "index.faiss")
65
+ self.drive_service.upload_file_to_google_drive(faiss_index_path, faiss_index_folder_id)
66
+ self.drive_service.upload_file_to_google_drive(index_pkl_path, faiss_index_folder_id)
67
+
68
+ return {
69
+ "status": "success",
70
+ "message": f"Chatbot '{folder_name}' created and files uploaded to Google Drive successfully!"
71
+ }
72
+ except HTTPException:
73
+ raise
74
+ except Exception as e:
75
+ logger.error(f"Error creating chatbot: {str(e)}")
76
+ raise HTTPException(status_code=500, detail=f"Failed to create chatbot: {str(e)}")