Spaces:

abhivsh
/

ModelTS_SearchEngine

Runtime error

App Files Files Community

abhivsh commited on Apr 9, 2024

Commit

9254348

verified ·

1 Parent(s): a193b2f

Create app.py

Browse files

Files changed (1) hide show

app.py +113 -0

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# !pip install langchain
+# !pip install langchain_community
+# !pip install langchain_text_splitters
+# !pip install langchain-google-genai
+# !pip install gradio
+# !pip install openai
+# !pip install pypdf
+# !pip install chromadb
+# !pip install tiktoken
+# !pip install python-dotenv
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.vectorstores import Chroma
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_openai import ChatOpenAI
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+import gradio as gr
+import os
+import requests
+import sys
+sys.path.append('../..')
+# For Google Colab
+'''
+from google.colab import userdata
+OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
+hf_token = userdata.get('hf_token')
+GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
+# For Desktop
+from dotenv import load_dotenv, find_dotenv
+_ = load_dotenv(find_dotenv()) # Read local .env file
+OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
+hf_token = os.environ['hf_token']
+GEMINI_API_KEY = os.environ['GEMINI_API_KEY']
+'''
+# For Hugging Face
+OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
+hf_token = os.environ.get('hf_token')
+GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
+fs_token = os.environ.get('fs_token')
+llm_name = "gpt-3.5-turbo"
+hf_model = "sentence-transformers/all-MiniLM-L6-v2"
+from huggingface_hub import HfFileSystem
+fs = HfFileSystem(token=fs_token)
+file_paths = fs.glob("datasets/abhivsh/Model-TS/**.pdf")
+def chat_query(question):
+    loaders = []
+    # Loop through PDF Files
+    loaders = []
+    for file_path in file_paths:
+        loaders.append(PyPDFLoader(file_path))
+    docs = []
+    for loader in loaders:
+         docs.extend(loader.load())
+    # Splitting Documents
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 150)
+    splits = text_splitter.split_documents(docs)
+    # Using Google GenAI Text Embeddings
+    embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_document", google_api_key=GEMINI_API_KEY)
+    # Create Embeddings for Searching the Splits
+    persist_directory = './chroma/'
+    vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
+    vectordb.persist()
+    llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY)
+    # Memory
+    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+    # Conversation Retrival Chain
+    retriever=vectordb.as_retriever()
+    qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
+    # Replace input() with question variable for Gradio
+    result = qa({"question": question})
+    return result['answer']
+logo_path = os.path.join(os.getcwd(), "Logo.png")
+iface = gr.Interface(
+    fn=chat_query,
+    inputs= gr.Textbox(lines = 6, placeholder="Enter your Query here....",label="Query :"),
+    outputs=gr.Textbox(label="Chatbot Reply : "),
+    title  = " -----:  ChatBot  :----- ",
+    description="""-- This Model can distinctively answer your Query using ChatGPT based on the Uploaded PDF Files (Multiple Files also supported).
+                   \n\n-- For precise reply, please input `Specific Keywords` in your Query, after uploading your files. \
+                   \n\n-- Reply time is solely based on the File size. """,
+    concurrency_limit = None,
+    thumbnail = logo_path,
+)
+iface.launch(share=True, debug=True)
+# What should be the GIB height outside the GIS hall ?