Spaces:
Sleeping
Sleeping
| import os | |
| from langchain.document_loaders import ( | |
| PyPDFLoader, | |
| TextLoader, | |
| Docx2txtLoader | |
| ) | |
| from langchain.text_splitter import CharacterTextSplitter | |
| # from PyPDF2 import PdfReader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| import google.generativeai as genai | |
| from langchain.vectorstores import FAISS | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain.chains.question_answering import load_qa_chain | |
| from langchain.prompts import PromptTemplate | |
| from langchain.memory import ConversationBufferMemory | |
| from dotenv import load_dotenv | |
| from src.agent import build_qa_chain | |
| import gradio as gr | |
| load_dotenv() | |
| genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | |
| class AgentChain: | |
| def __init__(self): | |
| self.agent = None | |
| self.db = None | |
| agent_chain = AgentChain() | |
| agent_chain.agent = build_qa_chain() | |
| def extract_text_from_files(docs): | |
| documents = [] | |
| files = os.listdir(docs) | |
| if len(files) == 0: | |
| return "Directory is empty" | |
| base_dir = docs.split("/") | |
| base_dir = "/".join(base_dir) | |
| for file in files: | |
| if file.endswith(".pdf"): | |
| pdf_path=os.path.join(base_dir, file) | |
| loader=PyPDFLoader(pdf_path) | |
| documents.extend(loader.load()) | |
| elif file.endswith('.docx') or file.endswith('.doc'): | |
| doc_path=os.path.join(base_dir, file) | |
| loader=Docx2txtLoader(doc_path) | |
| documents.extend(loader.load()) | |
| elif file.endswith('.txt'): | |
| text_path=os.path.join(base_dir, file) | |
| loader=TextLoader(text_path) | |
| documents.extend(loader.load()) | |
| return documents | |
| def extract_text_from_file(file): | |
| documents = [] | |
| filename = str(file) | |
| if filename.endswith(".pdf"): | |
| loader=PyPDFLoader(file) | |
| documents.extend(loader.load()) | |
| elif filename.endswith('.docx') or file.endswith('.doc'): | |
| loader=Docx2txtLoader(file) | |
| documents.extend(loader.load()) | |
| elif filename.endswith('.txt'): | |
| loader=TextLoader(file) | |
| documents.extend(loader.load()) | |
| print("Text extracted") | |
| return documents | |
| def get_text_chunks(text): | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000) | |
| chunks = text_splitter.split_documents(text) | |
| print("Chunks splitted") | |
| return chunks | |
| def save_in_faiss(text_chunks, save=False): | |
| embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") | |
| vector_store = FAISS.from_documents(text_chunks, embedding=embeddings) | |
| if save: | |
| vector_store.save_local("faiss_index") | |
| print("Document search created") | |
| return vector_store | |
| def process_files(file): | |
| documents = extract_text_from_file(file) | |
| text_chunks = get_text_chunks(documents) | |
| vector_store = save_in_faiss(text_chunks) | |
| agent_chain.db = vector_store | |
| gr.Info("Processing completed") | |
| return file | |
| def answer_query(message, history): | |
| if agent_chain.db is not None: | |
| docs = agent_chain.db.similarity_search(message) | |
| docs = [] | |
| response = agent_chain.agent({"input_documents": docs, "human_input": message}, return_only_outputs=True) | |
| return response['output_text'] |