Update utils.py
Browse files
utils.py
CHANGED
|
@@ -320,18 +320,20 @@ def rag_chain(llm, prompt, retriever):
|
|
| 320 |
relevant_docs=[]
|
| 321 |
most_relevant_docs=[]
|
| 322 |
relevant_docs = retriever.get_relevant_documents(prompt)
|
|
|
|
|
|
|
| 323 |
|
| 324 |
print("releant docs1......................")
|
| 325 |
-
if (len(
|
| 326 |
print("releant docs2......................")
|
| 327 |
-
print(
|
| 328 |
#llm_chain = LLMChain(llm = llm, prompt = RAG_CHAIN_PROMPT)
|
| 329 |
#result = llm_chain.run({"context": relevant_docs, "question": prompt})
|
| 330 |
# Erstelle ein PromptTemplate mit Platzhaltern für Kontext und Frage
|
| 331 |
#RAG_CHAIN_PROMPT = PromptTemplate(template="Context: {context}\n\nQuestion: {question}\n\nAnswer:")
|
| 332 |
|
| 333 |
# Inahlte Abrufen der relevanten Dokumente
|
| 334 |
-
doc_contents = [doc["content"] for doc in
|
| 335 |
|
| 336 |
#Berechne die Ähnlichkeiten und finde das relevanteste Dokument
|
| 337 |
question_embedding = embedder_modell.encode(prompt, convert_to_tensor=True)
|
|
@@ -340,7 +342,7 @@ def rag_chain(llm, prompt, retriever):
|
|
| 340 |
most_relevant_doc_indices = similarity_scores.argsort(descending=True).squeeze().tolist()
|
| 341 |
|
| 342 |
#Erstelle eine Liste der relevantesten Dokumente
|
| 343 |
-
most_relevant_docs = [
|
| 344 |
|
| 345 |
#Kombiniere die Inhalte aller relevanten Dokumente
|
| 346 |
combined_content = " ".join([doc["content"] for doc in most_relevant_docs])
|
|
@@ -370,6 +372,19 @@ def rag_chain(llm, prompt, retriever):
|
|
| 370 |
return result
|
| 371 |
|
| 372 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
|
| 374 |
|
| 375 |
|
|
|
|
| 320 |
relevant_docs=[]
|
| 321 |
most_relevant_docs=[]
|
| 322 |
relevant_docs = retriever.get_relevant_documents(prompt)
|
| 323 |
+
extracted_docs = extract_document_info(relevant_docs)
|
| 324 |
+
|
| 325 |
|
| 326 |
print("releant docs1......................")
|
| 327 |
+
if (len(extracted_docs)>0):
|
| 328 |
print("releant docs2......................")
|
| 329 |
+
print(extracted_docs)
|
| 330 |
#llm_chain = LLMChain(llm = llm, prompt = RAG_CHAIN_PROMPT)
|
| 331 |
#result = llm_chain.run({"context": relevant_docs, "question": prompt})
|
| 332 |
# Erstelle ein PromptTemplate mit Platzhaltern für Kontext und Frage
|
| 333 |
#RAG_CHAIN_PROMPT = PromptTemplate(template="Context: {context}\n\nQuestion: {question}\n\nAnswer:")
|
| 334 |
|
| 335 |
# Inahlte Abrufen der relevanten Dokumente
|
| 336 |
+
doc_contents = [doc["content"] for doc in extracted_docs]
|
| 337 |
|
| 338 |
#Berechne die Ähnlichkeiten und finde das relevanteste Dokument
|
| 339 |
question_embedding = embedder_modell.encode(prompt, convert_to_tensor=True)
|
|
|
|
| 342 |
most_relevant_doc_indices = similarity_scores.argsort(descending=True).squeeze().tolist()
|
| 343 |
|
| 344 |
#Erstelle eine Liste der relevantesten Dokumente
|
| 345 |
+
most_relevant_docs = [extracted_docs[i] for i in most_relevant_doc_indices]
|
| 346 |
|
| 347 |
#Kombiniere die Inhalte aller relevanten Dokumente
|
| 348 |
combined_content = " ".join([doc["content"] for doc in most_relevant_docs])
|
|
|
|
| 372 |
return result
|
| 373 |
|
| 374 |
|
| 375 |
+
# Funktion zum Erstellen der Liste von Dictionaries
|
| 376 |
+
def extract_document_info(documents):
|
| 377 |
+
extracted_info = []
|
| 378 |
+
for doc in documents:
|
| 379 |
+
info = {
|
| 380 |
+
'content': doc.page_content,
|
| 381 |
+
'page': doc.metadata['page'],
|
| 382 |
+
'path': doc.metadata['source']
|
| 383 |
+
}
|
| 384 |
+
extracted_info.append(info)
|
| 385 |
+
return extracted_info
|
| 386 |
+
|
| 387 |
+
|
| 388 |
|
| 389 |
|
| 390 |
|