Update app.py
Browse files
app.py
CHANGED
|
@@ -329,6 +329,28 @@ def document_retrieval_chroma(llm, prompt):
|
|
| 329 |
#ChromaDb um die embedings zu speichern
|
| 330 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
| 331 |
return db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
###########################################
|
| 333 |
#dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
|
| 334 |
def document_retrieval_mongodb(llm, prompt):
|
|
@@ -368,7 +390,6 @@ def rag_chain2(prompt, db, k=3):
|
|
| 368 |
neu_prompt = rag_template
|
| 369 |
for i, chunk in enumerate(retrieved_chunks):
|
| 370 |
neu_prompt += f"{i+1}. {chunk}\n"
|
| 371 |
-
print("neu_prompt:.................")
|
| 372 |
print(neu_prompt)
|
| 373 |
return neu_prompt
|
| 374 |
|
|
@@ -492,14 +513,12 @@ def generate_text_zu_bild(file, prompt, k, rag_option):
|
|
| 492 |
document_storage_chroma(splits)
|
| 493 |
db = document_retrieval_chroma()
|
| 494 |
#mit RAG:
|
| 495 |
-
print("hier!!!!!!!!!!!!!!!!!!!!")
|
| 496 |
neu_text_mit_chunks = rag_chain2(prompt, db, k)
|
| 497 |
#für Chat LLM:
|
| 498 |
#prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
|
| 499 |
#als reiner prompt:
|
| 500 |
prompt_neu = generate_prompt_with_history(neu_text_mit_chunks, history)
|
| 501 |
-
|
| 502 |
-
print(prompt_neu)
|
| 503 |
headers, payload = process_image(file, prompt_neu)
|
| 504 |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
| 505 |
#als json ausgeben
|
|
|
|
| 329 |
#ChromaDb um die embedings zu speichern
|
| 330 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
| 331 |
return db
|
| 332 |
+
|
| 333 |
+
############################################
|
| 334 |
+
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
| 335 |
+
#zweite Variante, passend zu rag_chain2 für generate_text_mit_bild- ohne llm vorher festlegen zu müssen
|
| 336 |
+
def document_retrieval_chroma2():
|
| 337 |
+
#OpenAI embeddings -------------------------------
|
| 338 |
+
embeddings = OpenAIEmbeddings()
|
| 339 |
+
|
| 340 |
+
#HF embeddings -----------------------------------
|
| 341 |
+
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
| 342 |
+
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
| 343 |
+
#etwas weniger rechenaufwendig:
|
| 344 |
+
#embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
| 345 |
+
#oder einfach ohne Langchain:
|
| 346 |
+
#embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
|
| 347 |
+
|
| 348 |
+
#ChromaDb um die embedings zu speichern
|
| 349 |
+
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
| 350 |
+
print ("Chroma DB bereit ...................")
|
| 351 |
+
|
| 352 |
+
return db
|
| 353 |
+
|
| 354 |
###########################################
|
| 355 |
#dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
|
| 356 |
def document_retrieval_mongodb(llm, prompt):
|
|
|
|
| 390 |
neu_prompt = rag_template
|
| 391 |
for i, chunk in enumerate(retrieved_chunks):
|
| 392 |
neu_prompt += f"{i+1}. {chunk}\n"
|
|
|
|
| 393 |
print(neu_prompt)
|
| 394 |
return neu_prompt
|
| 395 |
|
|
|
|
| 513 |
document_storage_chroma(splits)
|
| 514 |
db = document_retrieval_chroma()
|
| 515 |
#mit RAG:
|
|
|
|
| 516 |
neu_text_mit_chunks = rag_chain2(prompt, db, k)
|
| 517 |
#für Chat LLM:
|
| 518 |
#prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
|
| 519 |
#als reiner prompt:
|
| 520 |
prompt_neu = generate_prompt_with_history(neu_text_mit_chunks, history)
|
| 521 |
+
|
|
|
|
| 522 |
headers, payload = process_image(file, prompt_neu)
|
| 523 |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
| 524 |
#als json ausgeben
|