Frag-dein-PDF

Sleeping

App Files Files Community

AFischer1985 commited on Oct 12, 2024

Commit

43fdc5b

verified ·

1 Parent(s): 7151b09

Update run.py

Browse files

Files changed (1) hide show

run.py +50 -19

run.py CHANGED Viewed

@@ -38,23 +38,47 @@ print(client.list_collections())
 jina_ef=JinaEmbeddingFunction()
 embeddingModel=jina_ef
-from huggingface_hub import InferenceClient
-import gradio as gr
-import json
-inferenceClient = InferenceClient(
-    "mistralai/Mixtral-8x7B-Instruct-v0.1"
-    #"mistralai/Mistral-7B-Instruct-v0.1"
-)
-def format_prompt(message, history):
   prompt = "<s>"
   #for user_prompt, bot_response in history:
   #  prompt += f"[INST] {user_prompt} [/INST]"
-  #  prompt += f" {bot_response}</s> "
   prompt += f"[INST] {message} [/INST]"
   return prompt
 from pypdf import PdfReader
 import ocrmypdf
@@ -159,14 +183,20 @@ def add_doc(path, session):
     print(now-then) #zu viel GB für sentences (GPU), bzw. 0:00:10.375087 für chunks
   return(collection)
 #split_with_overlap("test me if you can",2,1)
 from datetime import date
 databases=[(date.today(),"0")] # list of all databases
 import gradio as gr
 import re
-def multimodalResponse(message,history,dropdown, request: gr.Request):
   print("def multimodal response!")
   global databases
   if request:
     session=request.session_hash
@@ -186,10 +216,7 @@ def multimodalResponse(message,history,dropdown, request: gr.Request):
   print(str(client.list_collections()))
   x=collection.get(include=[])["ids"]
   context=collection.query(query_texts=[query], n_results=1)
-  print(str(context))
-  #context=["<context "+str(i+1)+">\n"+c+"\n</context "+str(i+1)+">" for i, c in enumerate(retrievedTexts)]
-  #context="\n\n".join(context)
-  #return context
   generate_kwargs = dict(
         temperature=float(0.9),
         max_new_tokens=5000,
@@ -206,13 +233,15 @@ def multimodalResponse(message,history,dropdown, request: gr.Request):
   #"Return only your response to the question given the above information "+\
   #"following the users instructions as needed.\n\nContext:"+\
   print(system)
-  formatted_prompt = format_prompt(system+"\n"+query, history)
   stream = inferenceClient.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
   output = ""
   for response in stream:
     output += response.token.text
     yield output
-  #output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
   yield output
 i=gr.ChatInterface(multimodalResponse,
@@ -223,8 +252,10 @@ i=gr.ChatInterface(multimodalResponse,
       info="select retrieval version",
       choices=["1","2","3"],
       value=["1"],
-      label="Retrieval Version")])
 i.launch() #allowed_paths=["."])

 jina_ef=JinaEmbeddingFunction()
 embeddingModel=jina_ef
+#mod="mistralai/Mixtral-8x7b-instruct-v0.1"
+#tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
+#cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
+cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
+#res=tok.apply_chat_template(cha)
+#print(tok.decode(res))
+def format_prompt0(message, history):
   prompt = "<s>"
   #for user_prompt, bot_response in history:
   #  prompt += f"[INST] {user_prompt} [/INST]"
+  #  prompt += f" {bot_response}</s> "
   prompt += f"[INST] {message} [/INST]"
   return prompt
+def format_prompt(message, history, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=False):
+  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
+  startOfString="<s>"  #<s> [INST] U1 [/INST] A1</s> [INST] U2 [/INST] A2</s>
+  template0=" [INST]{system}\n[/INST]</s>"
+  template1=" [INST] {message} [/INST]"
+  template2=" {response}</s>"
+  prompt = ""
+  if RAGAddon is not None:
+    system += RAGAddon
+  if system is not None:
+    prompt += template0.format(system=system) #"<s>"
+  if history is not None:
+    for user_message, bot_response in history[-historylimit:]:
+      if user_message is None: user_message = ""
+      if bot_response is None: bot_response = ""
+      #bot_response = re.sub("\n\n<details>((.|\n)*?)</details>","", bot_response) # remove RAG-compontents
+      if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering)
+      if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])
+      if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit])
+  if message is not None: prompt += template1.format(message=message[:zeichenlimit])
+  if system2 is not None:
+    prompt += system2
+  return startOfString+prompt
 from pypdf import PdfReader
 import ocrmypdf
     print(now-then) #zu viel GB für sentences (GPU), bzw. 0:00:10.375087 für chunks
   return(collection)
 #split_with_overlap("test me if you can",2,1)
 from datetime import date
 databases=[(date.today(),"0")] # list of all databases
+from huggingface_hub import InferenceClient
 import gradio as gr
 import re
+def multimodalResponse(message, history, dropdown, hfToken, request: gr.Request):
   print("def multimodal response!")
+  if(hfToken.startswith("hf_")): # use HF-hub with custom token if token is provided
+    inferenceClient = InferenceClient(model=myModel, token=hfToken)
+  else:
+    inferenceClient = InferenceClient(myModel)
   global databases
   if request:
     session=request.session_hash
   print(str(client.list_collections()))
   x=collection.get(include=[])["ids"]
   context=collection.query(query_texts=[query], n_results=1)
+  gr.Info("Kontext:\n"+str(context))
   generate_kwargs = dict(
         temperature=float(0.9),
         max_new_tokens=5000,
   #"Return only your response to the question given the above information "+\
   #"following the users instructions as needed.\n\nContext:"+\
   print(system)
+  #formatted_prompt = format_prompt0(system+"\n"+query, history)
+  formatted_prompt = format_prompt(query, history,system=system)
+  print(formated_prompt)
   stream = inferenceClient.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
   output = ""
   for response in stream:
     output += response.token.text
     yield output
+  #output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br>"+str(context)+"</details>"
   yield output
 i=gr.ChatInterface(multimodalResponse,
       info="select retrieval version",
       choices=["1","2","3"],
       value=["1"],
+      label="Retrieval Version"),
+           gr.Textbox(
+      value="",
+      label="HF_token"),
+  ])
 i.launch() #allowed_paths=["."])