Update utils.py
Browse files
utils.py
CHANGED
|
@@ -15,9 +15,10 @@ import gc
|
|
| 15 |
from pygments.lexers import guess_lexer, ClassNotFound
|
| 16 |
import time
|
| 17 |
import json
|
| 18 |
-
import
|
| 19 |
-
from
|
| 20 |
-
import
|
|
|
|
| 21 |
|
| 22 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM, GPTNeoForCausalLM, GPT2Tokenizer
|
| 23 |
from sentence_transformers import SentenceTransformer, util
|
|
@@ -28,9 +29,9 @@ from pypinyin import lazy_pinyin
|
|
| 28 |
import tiktoken
|
| 29 |
import mdtex2html
|
| 30 |
from markdown import markdown
|
| 31 |
-
from pygments import highlight
|
| 32 |
-
from pygments.lexers import guess_lexer,get_lexer_by_name
|
| 33 |
-
from pygments.formatters import HtmlFormatter
|
| 34 |
|
| 35 |
from langchain.chains import LLMChain, RetrievalQA
|
| 36 |
from langchain.prompts import PromptTemplate
|
|
@@ -389,7 +390,7 @@ def llm_chain2(prompt, context):
|
|
| 389 |
inputs = tokenizer_rag(full_prompt, return_tensors="pt", max_length=1024, truncation=True)
|
| 390 |
|
| 391 |
#Generiere die Antwort
|
| 392 |
-
outputs = modell_rag.generate(inputs['input_ids'],
|
| 393 |
answer = tokenizer_rag.decode(outputs[0], skip_special_tokens=True)
|
| 394 |
|
| 395 |
return answer
|
|
@@ -538,6 +539,49 @@ def transfer_input(inputs):
|
|
| 538 |
)
|
| 539 |
|
| 540 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
#################################################
|
| 542 |
#Klasse mit zuständen - z.B. für interrupt wenn Stop gedrückt...
|
| 543 |
#################################################
|
|
|
|
| 15 |
from pygments.lexers import guess_lexer, ClassNotFound
|
| 16 |
import time
|
| 17 |
import json
|
| 18 |
+
import base64
|
| 19 |
+
from io import BytesIO
|
| 20 |
+
import urllib.parse
|
| 21 |
+
import tempfile
|
| 22 |
|
| 23 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM, GPTNeoForCausalLM, GPT2Tokenizer
|
| 24 |
from sentence_transformers import SentenceTransformer, util
|
|
|
|
| 29 |
import tiktoken
|
| 30 |
import mdtex2html
|
| 31 |
from markdown import markdown
|
| 32 |
+
#from pygments import highlight
|
| 33 |
+
#from pygments.lexers import guess_lexer,get_lexer_by_name
|
| 34 |
+
#from pygments.formatters import HtmlFormatter
|
| 35 |
|
| 36 |
from langchain.chains import LLMChain, RetrievalQA
|
| 37 |
from langchain.prompts import PromptTemplate
|
|
|
|
| 390 |
inputs = tokenizer_rag(full_prompt, return_tensors="pt", max_length=1024, truncation=True)
|
| 391 |
|
| 392 |
#Generiere die Antwort
|
| 393 |
+
outputs = modell_rag.generate(inputs['input_ids'], max_new_tokens=1024, num_beams=2, early_stopping=True)
|
| 394 |
answer = tokenizer_rag.decode(outputs[0], skip_special_tokens=True)
|
| 395 |
|
| 396 |
return answer
|
|
|
|
| 539 |
)
|
| 540 |
|
| 541 |
|
| 542 |
+
########################################################
|
| 543 |
+
######## Hilfsfunktionen Datei-Upload ##################
|
| 544 |
+
# Hochladen von Dateien
|
| 545 |
+
def upload_pdf(file):
|
| 546 |
+
if file is None:
|
| 547 |
+
return None, "Keine Datei hochgeladen."
|
| 548 |
+
|
| 549 |
+
# Extrahieren des Dateinamens aus dem vollen Pfad
|
| 550 |
+
filename = os.path.basename(file.name)
|
| 551 |
+
|
| 552 |
+
# Datei zum Hugging Face Space hochladen
|
| 553 |
+
upload_path = f"kkg_dokumente/{filename}"
|
| 554 |
+
api.upload_file(
|
| 555 |
+
path_or_fileobj=file.name,
|
| 556 |
+
path_in_repo=upload_path,
|
| 557 |
+
repo_id=REPO_ID,
|
| 558 |
+
repo_type=REPO_TYPE,
|
| 559 |
+
token=HF_WRITE
|
| 560 |
+
)
|
| 561 |
+
return f"PDF '{filename}' erfolgreich hochgeladen."
|
| 562 |
+
|
| 563 |
+
def display_files():
|
| 564 |
+
files = os.listdir(DOCS_DIR)
|
| 565 |
+
files_table = "<table style='width:100%; border-collapse: collapse;'>"
|
| 566 |
+
files_table += "<tr style='background-color: #930BBA; color: white; font-weight: bold; font-size: larger;'><th>Dateiname</th><th>Größe (KB)</th></tr>"
|
| 567 |
+
for i, file in enumerate(files):
|
| 568 |
+
file_path = os.path.join(DOCS_DIR, file)
|
| 569 |
+
file_size = os.path.getsize(file_path) / 1024 # Größe in KB
|
| 570 |
+
row_color = "#4f4f4f" if i % 2 == 0 else "#3a3a3a" # Wechselnde Zeilenfarben
|
| 571 |
+
files_table += f"<tr style='background-color: {row_color}; border-bottom: 1px solid #ddd;'>"
|
| 572 |
+
files_table += f"<td><b>{download_link(file)}</b></td>"
|
| 573 |
+
files_table += f"<td>{file_size:.2f}</td></tr>"
|
| 574 |
+
files_table += "</table>"
|
| 575 |
+
return files_table
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
# gefundene relevante Dokumente auflisten (links)
|
| 579 |
+
def list_pdfs():
|
| 580 |
+
if not os.path.exists(DOCS_DIR):
|
| 581 |
+
return []
|
| 582 |
+
return [f for f in os.listdir(SAVE_DIR) if f.endswith('.pdf')]
|
| 583 |
+
|
| 584 |
+
|
| 585 |
#################################################
|
| 586 |
#Klasse mit zuständen - z.B. für interrupt wenn Stop gedrückt...
|
| 587 |
#################################################
|