lsy9874205 commited on
Commit
cc4dc4b
·
1 Parent(s): 027886d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py CHANGED
@@ -15,8 +15,18 @@ from tqdm.asyncio import tqdm_asyncio
15
  import asyncio
16
  from tqdm.asyncio import tqdm
17
 
 
 
 
 
 
 
 
18
  load_dotenv()
19
 
 
 
 
20
  HF_LLM_ENDPOINT = os.getenv("HF_LLM_ENDPOINT")
21
  HF_EMBED_ENDPOINT = os.getenv("HF_EMBED_ENDPOINT")
22
  HF_TOKEN = os.getenv("HF_TOKEN")
@@ -24,6 +34,15 @@ HF_TOKEN = os.getenv("HF_TOKEN")
24
  if not all([HF_LLM_ENDPOINT, HF_EMBED_ENDPOINT, HF_TOKEN]):
25
  raise ValueError("Missing required environment variables. Please check your .env file.")
26
 
 
 
 
 
 
 
 
 
 
27
  document_loader = TextLoader("./data/paul_graham_essays.txt")
28
  documents = document_loader.load()
29
 
@@ -88,6 +107,11 @@ async def run():
88
 
89
  hf_retriever = asyncio.run(run())
90
 
 
 
 
 
 
91
  RAG_PROMPT_TEMPLATE = """\
92
  <|start_header_id|>system<|end_header_id|>
93
  You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
@@ -104,6 +128,10 @@ Context:
104
 
105
  rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
106
 
 
 
 
 
107
  hf_llm = HuggingFaceEndpoint(
108
  endpoint_url=HF_LLM_ENDPOINT,
109
  max_new_tokens=512,
@@ -116,6 +144,11 @@ hf_llm = HuggingFaceEndpoint(
116
 
117
  @cl.author_rename
118
  def rename(original_author: str):
 
 
 
 
 
119
  rename_dict = {
120
  "Assistant" : "Paul Graham Essay Bot"
121
  }
@@ -123,6 +156,13 @@ def rename(original_author: str):
123
 
124
  @cl.on_chat_start
125
  async def start_chat():
 
 
 
 
 
 
 
126
 
127
  lcel_rag_chain = (
128
  {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}
@@ -133,6 +173,13 @@ async def start_chat():
133
 
134
  @cl.on_message
135
  async def main(message: cl.Message):
 
 
 
 
 
 
 
136
  lcel_rag_chain = cl.user_session.get("lcel_rag_chain")
137
 
138
  msg = cl.Message(content="")
 
15
  import asyncio
16
  from tqdm.asyncio import tqdm
17
 
18
+ # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
19
+ # ---- ENV VARIABLES ---- #
20
+ """
21
+ This function will load our environment file (.env) if it is present.
22
+
23
+ NOTE: Make sure that .env is in your .gitignore file - it is by default, but please ensure it remains there.
24
+ """
25
  load_dotenv()
26
 
27
+ """
28
+ We will load our environment variables here.
29
+ """
30
  HF_LLM_ENDPOINT = os.getenv("HF_LLM_ENDPOINT")
31
  HF_EMBED_ENDPOINT = os.getenv("HF_EMBED_ENDPOINT")
32
  HF_TOKEN = os.getenv("HF_TOKEN")
 
34
  if not all([HF_LLM_ENDPOINT, HF_EMBED_ENDPOINT, HF_TOKEN]):
35
  raise ValueError("Missing required environment variables. Please check your .env file.")
36
 
37
+ # ---- GLOBAL DECLARATIONS ---- #
38
+
39
+ # -- RETRIEVAL -- #
40
+ """
41
+ 1. Load Documents from Text File
42
+ 2. Split Documents into Chunks
43
+ 3. Load HuggingFace Embeddings (remember to use the URL we set above)
44
+ 4. Index Files if they do not exist, otherwise load the vectorstore
45
+ """
46
  document_loader = TextLoader("./data/paul_graham_essays.txt")
47
  documents = document_loader.load()
48
 
 
107
 
108
  hf_retriever = asyncio.run(run())
109
 
110
+ # -- AUGMENTED -- #
111
+ """
112
+ 1. Define a String Template
113
+ 2. Create a Prompt Template from the String Template
114
+ """
115
  RAG_PROMPT_TEMPLATE = """\
116
  <|start_header_id|>system<|end_header_id|>
117
  You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
 
128
 
129
  rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
130
 
131
+ # -- GENERATION -- #
132
+ """
133
+ 1. Create a HuggingFaceEndpoint for the LLM
134
+ """
135
  hf_llm = HuggingFaceEndpoint(
136
  endpoint_url=HF_LLM_ENDPOINT,
137
  max_new_tokens=512,
 
144
 
145
  @cl.author_rename
146
  def rename(original_author: str):
147
+ """
148
+ This function can be used to rename the 'author' of a message.
149
+
150
+ In this case, we're overriding the 'Assistant' author to be 'Paul Graham Essay Bot'.
151
+ """
152
  rename_dict = {
153
  "Assistant" : "Paul Graham Essay Bot"
154
  }
 
156
 
157
  @cl.on_chat_start
158
  async def start_chat():
159
+ """
160
+ This function will be called at the start of every user session.
161
+
162
+ We will build our LCEL RAG chain here, and store it in the user session.
163
+
164
+ The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
165
+ """
166
 
167
  lcel_rag_chain = (
168
  {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}
 
173
 
174
  @cl.on_message
175
  async def main(message: cl.Message):
176
+ """
177
+ This function will be called every time a message is recieved from a session.
178
+
179
+ We will use the LCEL RAG chain to generate a response to the user query.
180
+
181
+ The LCEL RAG chain is stored in the user session, and is unique to each user session - this is why we can access it here.
182
+ """
183
  lcel_rag_chain = cl.user_session.get("lcel_rag_chain")
184
 
185
  msg = cl.Message(content="")