Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -78,7 +78,7 @@ print ("Inf.Client")
|
|
| 78 |
#client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")
|
| 79 |
#client = InferenceClient("https://ybdhvwle4ksrawzo.eu-west-1.aws.endpoints.huggingface.cloud")
|
| 80 |
#Inference mit Authorisation:
|
| 81 |
-
API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
| 82 |
HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
|
| 83 |
|
| 84 |
##############################################
|
|
@@ -196,14 +196,14 @@ def document_retrieval_chroma():
|
|
| 196 |
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
| 197 |
#etwas weniger rechenaufwendig:
|
| 198 |
#embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
|
|
|
|
|
|
| 199 |
|
| 200 |
#ChromaDb um die embedings zu speichern
|
| 201 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
| 202 |
print ("Chroma DB bereit ...................")
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
return db #, retriever
|
| 207 |
|
| 208 |
|
| 209 |
|
|
@@ -218,10 +218,9 @@ def llm_chain(prompt):
|
|
| 218 |
|
| 219 |
#langchain nutzen, um prompt an llm zu leiten, aber vorher in der VektorDB suchen, um passende splits zum Prompt hinzuzufügen
|
| 220 |
#prompt mit RAG!!!
|
| 221 |
-
def rag_chain(prompt, db):
|
| 222 |
rag_template = "Nutze die folgenden Kontext Teile am Ende, um die Frage zu beantworten . " + template + "Frage: " + prompt + "Kontext Teile: "
|
| 223 |
-
retrieved_chunks = db.similarity_search(prompt)
|
| 224 |
-
#retrieved_chunks = retriever.get_relevant_documents(prompt)
|
| 225 |
|
| 226 |
neu_prompt = rag_template
|
| 227 |
for i, chunk in enumerate(retrieved_chunks):
|
|
@@ -269,7 +268,7 @@ def generate_prompt_with_history_openai(prompt, history):
|
|
| 269 |
##############################################
|
| 270 |
# generate function
|
| 271 |
##############################################
|
| 272 |
-
def generate(text, history, rag_option, model_option, temperature=0.5, max_new_tokens=4048, top_p=0.6, repetition_penalty=1.3):
|
| 273 |
#mit RAG
|
| 274 |
if (rag_option is None):
|
| 275 |
raise gr.Error("Retrieval Augmented Generation ist erforderlich.")
|
|
@@ -277,6 +276,14 @@ def generate(text, history, rag_option, model_option, temperature=0.5, max_new_
|
|
| 277 |
raise gr.Error("Prompt ist erforderlich.")
|
| 278 |
|
| 279 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
if (rag_option == "An"):
|
| 281 |
#muss nur einmal ausgeführt werden...
|
| 282 |
if not splittet:
|
|
@@ -284,7 +291,7 @@ def generate(text, history, rag_option, model_option, temperature=0.5, max_new_
|
|
| 284 |
document_storage_chroma(splits)
|
| 285 |
db = document_retrieval_chroma()
|
| 286 |
#mit RAG:
|
| 287 |
-
neu_text_mit_chunks = rag_chain(text, db)
|
| 288 |
#für Chat LLM:
|
| 289 |
#prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
|
| 290 |
#als reiner prompt:
|
|
@@ -359,7 +366,8 @@ examples=[['Was ist ChtGPT-4?'],['schreibe ein Python Programm, dass die GPT-4 A
|
|
| 359 |
|
| 360 |
additional_inputs = [
|
| 361 |
gr.Radio(["Aus", "An"], label="RAG - LI Erweiterungen", value = "Aus"),
|
| 362 |
-
gr.Radio(["
|
|
|
|
| 363 |
gr.Slider(label="Temperature", value=0.65, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Höhere Werte erzeugen diversere Antworten", visible=True),
|
| 364 |
gr.Slider(label="Max new tokens", value=1024, minimum=0, maximum=4096, step=64, interactive=True, info="Maximale Anzahl neuer Tokens", visible=True),
|
| 365 |
gr.Slider(label="Top-p (nucleus sampling)", value=0.6, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Höhere Werte verwenden auch Tokens mit niedrigerer Wahrscheinlichkeit.", visible=True),
|
|
|
|
| 78 |
#client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")
|
| 79 |
#client = InferenceClient("https://ybdhvwle4ksrawzo.eu-west-1.aws.endpoints.huggingface.cloud")
|
| 80 |
#Inference mit Authorisation:
|
| 81 |
+
#API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
| 82 |
HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
|
| 83 |
|
| 84 |
##############################################
|
|
|
|
| 196 |
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
| 197 |
#etwas weniger rechenaufwendig:
|
| 198 |
#embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
| 199 |
+
#oder einfach ohne Langchain:
|
| 200 |
+
#embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
|
| 201 |
|
| 202 |
#ChromaDb um die embedings zu speichern
|
| 203 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
| 204 |
print ("Chroma DB bereit ...................")
|
| 205 |
+
|
| 206 |
+
return db
|
|
|
|
|
|
|
| 207 |
|
| 208 |
|
| 209 |
|
|
|
|
| 218 |
|
| 219 |
#langchain nutzen, um prompt an llm zu leiten, aber vorher in der VektorDB suchen, um passende splits zum Prompt hinzuzufügen
|
| 220 |
#prompt mit RAG!!!
|
| 221 |
+
def rag_chain(prompt, db, k=3):
|
| 222 |
rag_template = "Nutze die folgenden Kontext Teile am Ende, um die Frage zu beantworten . " + template + "Frage: " + prompt + "Kontext Teile: "
|
| 223 |
+
retrieved_chunks = db.similarity_search(prompt, k)
|
|
|
|
| 224 |
|
| 225 |
neu_prompt = rag_template
|
| 226 |
for i, chunk in enumerate(retrieved_chunks):
|
|
|
|
| 268 |
##############################################
|
| 269 |
# generate function
|
| 270 |
##############################################
|
| 271 |
+
def generate(text, history, rag_option, model_option, k=3, temperature=0.5, max_new_tokens=4048, top_p=0.6, repetition_penalty=1.3,):
|
| 272 |
#mit RAG
|
| 273 |
if (rag_option is None):
|
| 274 |
raise gr.Error("Retrieval Augmented Generation ist erforderlich.")
|
|
|
|
| 276 |
raise gr.Error("Prompt ist erforderlich.")
|
| 277 |
|
| 278 |
try:
|
| 279 |
+
if (model_option == "Huggingface1"):
|
| 280 |
+
#Anfrage an InferenceEndpoint1 ----------------------------
|
| 281 |
+
API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
| 282 |
+
print("HF1")
|
| 283 |
+
else:
|
| 284 |
+
API_URL = "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
|
| 285 |
+
print("HF2")
|
| 286 |
+
|
| 287 |
if (rag_option == "An"):
|
| 288 |
#muss nur einmal ausgeführt werden...
|
| 289 |
if not splittet:
|
|
|
|
| 291 |
document_storage_chroma(splits)
|
| 292 |
db = document_retrieval_chroma()
|
| 293 |
#mit RAG:
|
| 294 |
+
neu_text_mit_chunks = rag_chain(text, db, k)
|
| 295 |
#für Chat LLM:
|
| 296 |
#prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
|
| 297 |
#als reiner prompt:
|
|
|
|
| 366 |
|
| 367 |
additional_inputs = [
|
| 368 |
gr.Radio(["Aus", "An"], label="RAG - LI Erweiterungen", value = "Aus"),
|
| 369 |
+
gr.Radio(["HuggingFace1", "HuggingFace2"], label="Modellauswahl", value = "HuggingFace1"),
|
| 370 |
+
gr.Slider(label="Anzahl Vergleichsdokumente", value=3, minimum=1, maximum=10, step=1, interactive=True, info="wie viele Dokumententeile aus dem Vektorstore an den prompt gehängt werden", visible=True)
|
| 371 |
gr.Slider(label="Temperature", value=0.65, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Höhere Werte erzeugen diversere Antworten", visible=True),
|
| 372 |
gr.Slider(label="Max new tokens", value=1024, minimum=0, maximum=4096, step=64, interactive=True, info="Maximale Anzahl neuer Tokens", visible=True),
|
| 373 |
gr.Slider(label="Top-p (nucleus sampling)", value=0.6, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Höhere Werte verwenden auch Tokens mit niedrigerer Wahrscheinlichkeit.", visible=True),
|