# !pip install langchain # !pip install langchain_community # !pip install langchain_text_splitters # !pip install langchain-google-genai # !pip install gradio # !pip install openai # !pip install pypdf # !pip install chromadb # !pip install tiktoken # !pip install python-dotenv from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import Chroma from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_openai import ChatOpenAI from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain_google_genai import GoogleGenerativeAIEmbeddings import gradio as gr import os import requests import sys sys.path.append('../..') # For Google Colab ''' from google.colab import userdata OPENAI_API_KEY = userdata.get('OPENAI_API_KEY') hf_token = userdata.get('hf_token') GEMINI_API_KEY = userdata.get('GEMINI_API_KEY') # For Desktop from dotenv import load_dotenv, find_dotenv _ = load_dotenv(find_dotenv()) # Read local .env file OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] hf_token = os.environ['hf_token'] GEMINI_API_KEY = os.environ['GEMINI_API_KEY'] ''' # For Hugging Face OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') hf_token = os.environ.get('hf_token') GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY') fs_token = os.environ.get('fs_token') llm_name = "gpt-3.5-turbo" hf_model = "sentence-transformers/all-MiniLM-L6-v2" from huggingface_hub import HfFileSystem fs = HfFileSystem(token=fs_token) file_paths = fs.glob("datasets/abhivsh/Model-TS/*.pdf") hf_file_paths = ["hf://"+ file_path for file_path in file_paths] def chat_query(question): loaders = [] # Loop through PDF Files loaders = [] for file_path in hf_file_paths: loaders.append(PyPDFLoader(file_path)) docs = [] for loader in loaders: docs.extend(loader.load()) # Splitting Documents text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 150) splits = text_splitter.split_documents(docs) # Using Google GenAI Text Embeddings embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_document", google_api_key=GEMINI_API_KEY) # Create Embeddings for Searching the Splits persist_directory = './chroma/' vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model) vectordb.persist() llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY) # Memory memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) # Conversation Retrival Chain retriever=vectordb.as_retriever() qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory) # Replace input() with question variable for Gradio result = qa({"question": question}) return result['answer'] logo_path = os.path.join(os.getcwd(), "Logo.png") iface = gr.Interface( fn=chat_query, inputs= gr.Textbox(lines = 6, placeholder="Enter your Query here....",label="Query :"), outputs=gr.Textbox(label="Chatbot Reply : "), title = " -----: ChatBot :----- ", description="""-- This Model can distinctively answer your Query using ChatGPT based on the Uploaded PDF Files (Multiple Files also supported). \n\n-- For precise reply, please input `Specific Keywords` in your Query, after uploading your files. \ \n\n-- Reply time is solely based on the File size. """, concurrency_limit = None, thumbnail = logo_path, ) iface.launch(share=True, debug=True) # What should be the GIB height outside the GIS hall ?