Spaces:
Sleeping
Sleeping
app20
Browse files
app.py
CHANGED
|
@@ -51,8 +51,8 @@ loader = PyPDFDirectoryLoader('pdfs')
|
|
| 51 |
data=loader.load()
|
| 52 |
# split documents
|
| 53 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 54 |
-
chunk_size=
|
| 55 |
-
chunk_overlap=
|
| 56 |
length_function=len
|
| 57 |
)
|
| 58 |
docs = text_splitter.split_documents(data)
|
|
@@ -69,7 +69,7 @@ vectordb = Chroma.from_documents(
|
|
| 69 |
persist_directory=persist_directory
|
| 70 |
)
|
| 71 |
# define retriever
|
| 72 |
-
retriever = vectordb.as_retriever(search_kwargs={"k":
|
| 73 |
|
| 74 |
class FinalAnswer(BaseModel):
|
| 75 |
question: str = Field(description="the original question")
|
|
@@ -81,7 +81,10 @@ parser = PydanticOutputParser(pydantic_object=FinalAnswer)
|
|
| 81 |
template = """
|
| 82 |
Your name is AngryGreta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
|
| 83 |
Use the following pieces of context to answer the question /
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
| 85 |
Context: {context}
|
| 86 |
User: {question}
|
| 87 |
{format_instructions}
|
|
@@ -99,10 +102,11 @@ llm = HuggingFaceHub(
|
|
| 99 |
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 100 |
task="text-generation",
|
| 101 |
model_kwargs={
|
| 102 |
-
"max_new_tokens":
|
| 103 |
"top_k": 30,
|
| 104 |
"temperature": 0.1,
|
| 105 |
"repetition_penalty": 1.03,
|
|
|
|
| 106 |
},
|
| 107 |
)
|
| 108 |
|
|
|
|
| 51 |
data=loader.load()
|
| 52 |
# split documents
|
| 53 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 54 |
+
chunk_size=1024,
|
| 55 |
+
chunk_overlap=150,
|
| 56 |
length_function=len
|
| 57 |
)
|
| 58 |
docs = text_splitter.split_documents(data)
|
|
|
|
| 69 |
persist_directory=persist_directory
|
| 70 |
)
|
| 71 |
# define retriever
|
| 72 |
+
retriever = vectordb.as_retriever(search_kwargs={"k": 2}, search_type="mmr")
|
| 73 |
|
| 74 |
class FinalAnswer(BaseModel):
|
| 75 |
question: str = Field(description="the original question")
|
|
|
|
| 81 |
template = """
|
| 82 |
Your name is AngryGreta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
|
| 83 |
Use the following pieces of context to answer the question /
|
| 84 |
+
If the question is English answer in English /
|
| 85 |
+
If the question is Spanish answer in Spanish /
|
| 86 |
+
Do not mention the word context when you answer a question, use the word database instead /
|
| 87 |
+
Answer the question fully and provide as much relevant detail as possible. Do not cut your response short
|
| 88 |
Context: {context}
|
| 89 |
User: {question}
|
| 90 |
{format_instructions}
|
|
|
|
| 102 |
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 103 |
task="text-generation",
|
| 104 |
model_kwargs={
|
| 105 |
+
"max_new_tokens": 2000,
|
| 106 |
"top_k": 30,
|
| 107 |
"temperature": 0.1,
|
| 108 |
"repetition_penalty": 1.03,
|
| 109 |
+
"early_stopping "="never"
|
| 110 |
},
|
| 111 |
)
|
| 112 |
|