Spaces:
Sleeping
Sleeping
DEBUG: FlashRank
Browse files- functions.py +3 -7
functions.py
CHANGED
|
@@ -198,15 +198,11 @@ def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192")
|
|
| 198 |
vectorstore=vectorstore,
|
| 199 |
docstore=store,
|
| 200 |
child_splitter=RecursiveCharacterTextSplitter(),
|
| 201 |
-
search_kwargs={"k":
|
| 202 |
-
)
|
| 203 |
-
compressor = FlashrankRerank()
|
| 204 |
-
retriever = ContextualCompressionRetriever(
|
| 205 |
-
base_compressor=compressor, base_retriever=retriever
|
| 206 |
)
|
| 207 |
baseChain = (
|
| 208 |
{"context": RunnableLambda(lambda x: x["question"]) | retriever | RunnableLambda(format_docs),
|
| 209 |
-
"question":
|
| 210 |
| prompt
|
| 211 |
| ChatGroq(model=llmModel, temperature=0.75, max_tokens=512)
|
| 212 |
| StrOutputParser()
|
|
@@ -289,7 +285,7 @@ def getLinks(url: str, timeout=30):
|
|
| 289 |
def getTextFromImagePDF(pdfBytes):
|
| 290 |
def getText(image):
|
| 291 |
global reader
|
| 292 |
-
return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
|
| 293 |
allImages = convert_from_bytes(pdfBytes)
|
| 294 |
texts = [getText(image) for image in allImages]
|
| 295 |
return "\n\n\n".join(texts)
|
|
|
|
| 198 |
vectorstore=vectorstore,
|
| 199 |
docstore=store,
|
| 200 |
child_splitter=RecursiveCharacterTextSplitter(),
|
| 201 |
+
search_kwargs={"k": 4, "score_threshold": 0}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
)
|
| 203 |
baseChain = (
|
| 204 |
{"context": RunnableLambda(lambda x: x["question"]) | retriever | RunnableLambda(format_docs),
|
| 205 |
+
"question": RunnableLambda(lambda x: x["question"]), "chatHistory": RunnableLambda(lambda x: x["chatHistory"])}
|
| 206 |
| prompt
|
| 207 |
| ChatGroq(model=llmModel, temperature=0.75, max_tokens=512)
|
| 208 |
| StrOutputParser()
|
|
|
|
| 285 |
def getTextFromImagePDF(pdfBytes):
|
| 286 |
def getText(image):
|
| 287 |
global reader
|
| 288 |
+
return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True, x_ths = 0)])
|
| 289 |
allImages = convert_from_bytes(pdfBytes)
|
| 290 |
texts = [getText(image) for image in allImages]
|
| 291 |
return "\n\n\n".join(texts)
|