Spaces:
Sleeping
Sleeping
Update run.py
Browse files
run.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
# Title: Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
|
| 3 |
# Author: Andreas Fischer
|
| 4 |
# Date: October 10th, 2024
|
| 5 |
-
# Last update: October
|
| 6 |
##########################################################################################
|
| 7 |
|
| 8 |
import os
|
|
@@ -108,23 +108,26 @@ def split_with_overlap(text,chunk_size=3500, overlap=700):
|
|
| 108 |
chunks.append(text[i:end])
|
| 109 |
return chunks
|
| 110 |
|
| 111 |
-
|
|
|
|
| 112 |
print("def add_doc!")
|
| 113 |
print(path)
|
| 114 |
anhang=False
|
| 115 |
if(str.lower(path).endswith(".pdf") and os.path.exists(path)):
|
| 116 |
doc=convertPDF(path)
|
| 117 |
if(len(doc[0])>5):
|
| 118 |
-
gr.Info("PDF uploaded, start Indexing excerpt (first 5 pages)!")
|
| 119 |
else:
|
| 120 |
-
gr.Info("PDF uploaded, start Indexing!")
|
| 121 |
doc="\n\n".join(doc[0][0:5])
|
| 122 |
anhang=True
|
| 123 |
-
|
|
|
|
| 124 |
client = chromadb.PersistentClient(path="output/general_knowledge")
|
| 125 |
print(str(client.list_collections()))
|
| 126 |
#global collection
|
| 127 |
-
|
|
|
|
| 128 |
if(not "name="+dbName in str(client.list_collections())):
|
| 129 |
# client.delete_collection(name=dbName)
|
| 130 |
collection = client.create_collection(
|
|
@@ -157,17 +160,28 @@ def add_doc(path):
|
|
| 157 |
return(collection)
|
| 158 |
|
| 159 |
#split_with_overlap("test me if you can",2,1)
|
|
|
|
|
|
|
| 160 |
|
| 161 |
import gradio as gr
|
| 162 |
import re
|
| 163 |
-
def multimodalResponse(message,history,dropdown):
|
| 164 |
print("def multimodal response!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
length=str(len(history))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
query=message["text"]
|
| 167 |
if(len(message["files"])>0): # is there at least one file attached?
|
| 168 |
-
collection=add_doc(message["files"][0])
|
| 169 |
-
else:
|
| 170 |
-
collection=add_doc(message["text"])
|
| 171 |
client = chromadb.PersistentClient(path="output/general_knowledge")
|
| 172 |
print(str(client.list_collections()))
|
| 173 |
x=collection.get(include=[])["ids"]
|
|
@@ -214,11 +228,3 @@ i.launch() #allowed_paths=["."])
|
|
| 214 |
|
| 215 |
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
| 2 |
# Title: Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
|
| 3 |
# Author: Andreas Fischer
|
| 4 |
# Date: October 10th, 2024
|
| 5 |
+
# Last update: October 12th, 2024
|
| 6 |
##########################################################################################
|
| 7 |
|
| 8 |
import os
|
|
|
|
| 108 |
chunks.append(text[i:end])
|
| 109 |
return chunks
|
| 110 |
|
| 111 |
+
|
| 112 |
+
def add_doc(path, session):
|
| 113 |
print("def add_doc!")
|
| 114 |
print(path)
|
| 115 |
anhang=False
|
| 116 |
if(str.lower(path).endswith(".pdf") and os.path.exists(path)):
|
| 117 |
doc=convertPDF(path)
|
| 118 |
if(len(doc[0])>5):
|
| 119 |
+
gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing excerpt (first 5 pages)!")
|
| 120 |
else:
|
| 121 |
+
gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing!")
|
| 122 |
doc="\n\n".join(doc[0][0:5])
|
| 123 |
anhang=True
|
| 124 |
+
else:
|
| 125 |
+
gr.Info("No PDF attached - answer based on DB_"+str(session)+".")
|
| 126 |
client = chromadb.PersistentClient(path="output/general_knowledge")
|
| 127 |
print(str(client.list_collections()))
|
| 128 |
#global collection
|
| 129 |
+
print(str(session))
|
| 130 |
+
dbName="DB_"+str(session)
|
| 131 |
if(not "name="+dbName in str(client.list_collections())):
|
| 132 |
# client.delete_collection(name=dbName)
|
| 133 |
collection = client.create_collection(
|
|
|
|
| 160 |
return(collection)
|
| 161 |
|
| 162 |
#split_with_overlap("test me if you can",2,1)
|
| 163 |
+
from datetime import date
|
| 164 |
+
databases=[(date.today(),"0")] # list of all databases
|
| 165 |
|
| 166 |
import gradio as gr
|
| 167 |
import re
|
| 168 |
+
def multimodalResponse(message,history,dropdown, request: gr.Request):
|
| 169 |
print("def multimodal response!")
|
| 170 |
+
global databases
|
| 171 |
+
if request:
|
| 172 |
+
session=request.session_hash
|
| 173 |
+
else:
|
| 174 |
+
session="0"
|
| 175 |
length=str(len(history))
|
| 176 |
+
print(databases)
|
| 177 |
+
if(not databases[-1][1]==session):
|
| 178 |
+
databases.append((date.today(),session))
|
| 179 |
+
#print(databases)
|
| 180 |
query=message["text"]
|
| 181 |
if(len(message["files"])>0): # is there at least one file attached?
|
| 182 |
+
collection=add_doc(message["files"][0], session)
|
| 183 |
+
else: # otherwise, you still want to get the collection with the session-based db
|
| 184 |
+
collection=add_doc(message["text"], session)
|
| 185 |
client = chromadb.PersistentClient(path="output/general_knowledge")
|
| 186 |
print(str(client.list_collections()))
|
| 187 |
x=collection.get(include=[])["ids"]
|
|
|
|
| 228 |
|
| 229 |
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|