abhivsh commited on
Commit
9254348
·
verified ·
1 Parent(s): a193b2f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # !pip install langchain
2
+ # !pip install langchain_community
3
+ # !pip install langchain_text_splitters
4
+ # !pip install langchain-google-genai
5
+ # !pip install gradio
6
+ # !pip install openai
7
+ # !pip install pypdf
8
+ # !pip install chromadb
9
+ # !pip install tiktoken
10
+ # !pip install python-dotenv
11
+
12
+ from langchain_community.document_loaders import PyPDFLoader
13
+ from langchain_community.vectorstores import Chroma
14
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
15
+ from langchain_openai import ChatOpenAI
16
+ from langchain.memory import ConversationBufferMemory
17
+ from langchain.chains import ConversationalRetrievalChain
18
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
19
+
20
+ import gradio as gr
21
+ import os
22
+ import requests
23
+
24
+ import sys
25
+ sys.path.append('../..')
26
+
27
+ # For Google Colab
28
+ '''
29
+ from google.colab import userdata
30
+ OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
31
+ hf_token = userdata.get('hf_token')
32
+ GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
33
+
34
+ # For Desktop
35
+
36
+ from dotenv import load_dotenv, find_dotenv
37
+ _ = load_dotenv(find_dotenv()) # Read local .env file
38
+ OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
39
+ hf_token = os.environ['hf_token']
40
+ GEMINI_API_KEY = os.environ['GEMINI_API_KEY']
41
+ '''
42
+
43
+ # For Hugging Face
44
+ OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
45
+ hf_token = os.environ.get('hf_token')
46
+ GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
47
+ fs_token = os.environ.get('fs_token')
48
+
49
+ llm_name = "gpt-3.5-turbo"
50
+ hf_model = "sentence-transformers/all-MiniLM-L6-v2"
51
+
52
+ from huggingface_hub import HfFileSystem
53
+ fs = HfFileSystem(token=fs_token)
54
+ file_paths = fs.glob("datasets/abhivsh/Model-TS/**.pdf")
55
+
56
+
57
+ def chat_query(question):
58
+
59
+ loaders = []
60
+
61
+ # Loop through PDF Files
62
+ loaders = []
63
+
64
+ for file_path in file_paths:
65
+ loaders.append(PyPDFLoader(file_path))
66
+
67
+ docs = []
68
+ for loader in loaders:
69
+ docs.extend(loader.load())
70
+
71
+ # Splitting Documents
72
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 150)
73
+ splits = text_splitter.split_documents(docs)
74
+
75
+ # Using Google GenAI Text Embeddings
76
+ embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_document", google_api_key=GEMINI_API_KEY)
77
+
78
+ # Create Embeddings for Searching the Splits
79
+ persist_directory = './chroma/'
80
+ vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
81
+ vectordb.persist()
82
+ llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY)
83
+
84
+ # Memory
85
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
86
+
87
+ # Conversation Retrival Chain
88
+ retriever=vectordb.as_retriever()
89
+ qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
90
+
91
+ # Replace input() with question variable for Gradio
92
+ result = qa({"question": question})
93
+ return result['answer']
94
+
95
+
96
+ logo_path = os.path.join(os.getcwd(), "Logo.png")
97
+
98
+ iface = gr.Interface(
99
+ fn=chat_query,
100
+ inputs= gr.Textbox(lines = 6, placeholder="Enter your Query here....",label="Query :"),
101
+ outputs=gr.Textbox(label="Chatbot Reply : "),
102
+ title = " -----: ChatBot :----- ",
103
+ description="""-- This Model can distinctively answer your Query using ChatGPT based on the Uploaded PDF Files (Multiple Files also supported).
104
+ \n\n-- For precise reply, please input `Specific Keywords` in your Query, after uploading your files. \
105
+ \n\n-- Reply time is solely based on the File size. """,
106
+ concurrency_limit = None,
107
+ thumbnail = logo_path,
108
+ )
109
+
110
+
111
+ iface.launch(share=True, debug=True)
112
+
113
+ # What should be the GIB height outside the GIS hall ?