s4um1l commited on
Commit
13317bf
·
1 Parent(s): 2730edc

updating app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -15
app.py CHANGED
@@ -40,17 +40,17 @@ HF_TOKEN = os.environ["HF_TOKEN"]
40
  3. Load HuggingFace Embeddings (remember to use the URL we set above)
41
  4. Index Files if they do not exist, otherwise load the vectorstore
42
  """
43
- ### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
44
- ### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
45
- text_loader =
46
- documents =
47
 
48
- ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
49
- text_splitter =
50
- split_documents =
51
 
52
- ### 3. LOAD HUGGINGFACE EMBEDDINGS
53
- hf_embeddings =
 
 
 
54
 
55
  async def add_documents_async(vectorstore, documents):
56
  await vectorstore.aadd_documents(documents)
@@ -109,18 +109,35 @@ hf_retriever = asyncio.run(run())
109
  1. Define a String Template
110
  2. Create a Prompt Template from the String Template
111
  """
112
- ### 1. DEFINE STRING TEMPLATE
113
- RAG_PROMPT_TEMPLATE =
 
114
 
115
- ### 2. CREATE PROMPT TEMPLATE
116
- rag_prompt =
 
 
 
 
 
 
 
 
 
117
 
118
  # -- GENERATION -- #
119
  """
120
  1. Create a HuggingFaceEndpoint for the LLM
121
  """
122
- ### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
123
- hf_llm =
 
 
 
 
 
 
 
124
 
125
  @cl.author_rename
126
  def rename(original_author: str):
 
40
  3. Load HuggingFace Embeddings (remember to use the URL we set above)
41
  4. Index Files if they do not exist, otherwise load the vectorstore
42
  """
43
+ document_loader = TextLoader("./data/paul_graham_essays.txt")
44
+ documents = document_loader.load()
 
 
45
 
46
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
47
+ split_documents = text_splitter.split_documents(documents)
 
48
 
49
+ hf_embeddings = HuggingFaceEndpointEmbeddings(
50
+ model=HF_EMBED_ENDPOINT,
51
+ task="feature-extraction",
52
+ huggingfacehub_api_token=HF_TOKEN,
53
+ )
54
 
55
  async def add_documents_async(vectorstore, documents):
56
  await vectorstore.aadd_documents(documents)
 
109
  1. Define a String Template
110
  2. Create a Prompt Template from the String Template
111
  """
112
+ RAG_PROMPT_TEMPLATE = """\
113
+ <|start_header_id|>system<|end_header_id|>
114
+ You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
115
 
116
+ <|start_header_id|>user<|end_header_id|>
117
+ User Query:
118
+ {query}
119
+
120
+ Context:
121
+ {context}<|eot_id|>
122
+
123
+ <|start_header_id|>assistant<|end_header_id|>
124
+ """
125
+
126
+ rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
127
 
128
  # -- GENERATION -- #
129
  """
130
  1. Create a HuggingFaceEndpoint for the LLM
131
  """
132
+ hf_llm = HuggingFaceEndpoint(
133
+ endpoint_url=HF_LLM_ENDPOINT,
134
+ max_new_tokens=512,
135
+ top_k=10,
136
+ top_p=0.95,
137
+ temperature=0.3,
138
+ repetition_penalty=1.15,
139
+ huggingfacehub_api_token=HF_TOKEN,
140
+ )
141
 
142
  @cl.author_rename
143
  def rename(original_author: str):