Spaces:
Sleeping
Sleeping
updating app.py
Browse files
app.py
CHANGED
|
@@ -40,17 +40,17 @@ HF_TOKEN = os.environ["HF_TOKEN"]
|
|
| 40 |
3. Load HuggingFace Embeddings (remember to use the URL we set above)
|
| 41 |
4. Index Files if they do not exist, otherwise load the vectorstore
|
| 42 |
"""
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
text_loader =
|
| 46 |
-
documents =
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
split_documents =
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
async def add_documents_async(vectorstore, documents):
|
| 56 |
await vectorstore.aadd_documents(documents)
|
|
@@ -109,18 +109,35 @@ hf_retriever = asyncio.run(run())
|
|
| 109 |
1. Define a String Template
|
| 110 |
2. Create a Prompt Template from the String Template
|
| 111 |
"""
|
| 112 |
-
|
| 113 |
-
|
|
|
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
# -- GENERATION -- #
|
| 119 |
"""
|
| 120 |
1. Create a HuggingFaceEndpoint for the LLM
|
| 121 |
"""
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
@cl.author_rename
|
| 126 |
def rename(original_author: str):
|
|
|
|
| 40 |
3. Load HuggingFace Embeddings (remember to use the URL we set above)
|
| 41 |
4. Index Files if they do not exist, otherwise load the vectorstore
|
| 42 |
"""
|
| 43 |
+
document_loader = TextLoader("./data/paul_graham_essays.txt")
|
| 44 |
+
documents = document_loader.load()
|
|
|
|
|
|
|
| 45 |
|
| 46 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
|
| 47 |
+
split_documents = text_splitter.split_documents(documents)
|
|
|
|
| 48 |
|
| 49 |
+
hf_embeddings = HuggingFaceEndpointEmbeddings(
|
| 50 |
+
model=HF_EMBED_ENDPOINT,
|
| 51 |
+
task="feature-extraction",
|
| 52 |
+
huggingfacehub_api_token=HF_TOKEN,
|
| 53 |
+
)
|
| 54 |
|
| 55 |
async def add_documents_async(vectorstore, documents):
|
| 56 |
await vectorstore.aadd_documents(documents)
|
|
|
|
| 109 |
1. Define a String Template
|
| 110 |
2. Create a Prompt Template from the String Template
|
| 111 |
"""
|
| 112 |
+
RAG_PROMPT_TEMPLATE = """\
|
| 113 |
+
<|start_header_id|>system<|end_header_id|>
|
| 114 |
+
You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
|
| 115 |
|
| 116 |
+
<|start_header_id|>user<|end_header_id|>
|
| 117 |
+
User Query:
|
| 118 |
+
{query}
|
| 119 |
+
|
| 120 |
+
Context:
|
| 121 |
+
{context}<|eot_id|>
|
| 122 |
+
|
| 123 |
+
<|start_header_id|>assistant<|end_header_id|>
|
| 124 |
+
"""
|
| 125 |
+
|
| 126 |
+
rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
|
| 127 |
|
| 128 |
# -- GENERATION -- #
|
| 129 |
"""
|
| 130 |
1. Create a HuggingFaceEndpoint for the LLM
|
| 131 |
"""
|
| 132 |
+
hf_llm = HuggingFaceEndpoint(
|
| 133 |
+
endpoint_url=HF_LLM_ENDPOINT,
|
| 134 |
+
max_new_tokens=512,
|
| 135 |
+
top_k=10,
|
| 136 |
+
top_p=0.95,
|
| 137 |
+
temperature=0.3,
|
| 138 |
+
repetition_penalty=1.15,
|
| 139 |
+
huggingfacehub_api_token=HF_TOKEN,
|
| 140 |
+
)
|
| 141 |
|
| 142 |
@cl.author_rename
|
| 143 |
def rename(original_author: str):
|