Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,10 +7,10 @@ from langchain.chains import RetrievalQA
|
|
| 7 |
from langchain.llms.base import LLM
|
| 8 |
from transformers import pipeline
|
| 9 |
|
| 10 |
-
#
|
| 11 |
-
class
|
| 12 |
-
def __init__(self, max_new_tokens=
|
| 13 |
-
self.pipeline = pipeline("text-generation", model="
|
| 14 |
self.max_new_tokens = max_new_tokens
|
| 15 |
self.temperature = temperature
|
| 16 |
|
|
@@ -20,13 +20,13 @@ class LeoLM(LLM):
|
|
| 20 |
|
| 21 |
@property
|
| 22 |
def _identifying_params(self):
|
| 23 |
-
return {"model": "
|
| 24 |
|
| 25 |
@property
|
| 26 |
def _llm_type(self):
|
| 27 |
-
return "
|
| 28 |
|
| 29 |
-
#
|
| 30 |
loader = PyPDFLoader("TrendingMedia_ChatbotBasis_FINAL.pdf")
|
| 31 |
documents = loader.load()
|
| 32 |
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
|
@@ -35,8 +35,8 @@ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-
|
|
| 35 |
db = FAISS.from_documents(texts, embeddings)
|
| 36 |
retriever = db.as_retriever(search_kwargs={"k": 2})
|
| 37 |
|
| 38 |
-
# Verwende den neuen
|
| 39 |
-
llm =
|
| 40 |
|
| 41 |
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
|
| 42 |
|
|
|
|
| 7 |
from langchain.llms.base import LLM
|
| 8 |
from transformers import pipeline
|
| 9 |
|
| 10 |
+
# Wrapper-Klasse für das deutsche GPT-2 Modell
|
| 11 |
+
class GermanGPT2(LLM):
|
| 12 |
+
def __init__(self, max_new_tokens=128, temperature=0.7):
|
| 13 |
+
self.pipeline = pipeline("text-generation", model="dbmdz/german-gpt2")
|
| 14 |
self.max_new_tokens = max_new_tokens
|
| 15 |
self.temperature = temperature
|
| 16 |
|
|
|
|
| 20 |
|
| 21 |
@property
|
| 22 |
def _identifying_params(self):
|
| 23 |
+
return {"model": "dbmdz/german-gpt2"}
|
| 24 |
|
| 25 |
@property
|
| 26 |
def _llm_type(self):
|
| 27 |
+
return "custom_german_gpt2"
|
| 28 |
|
| 29 |
+
# Lade und verarbeite das PDF beim Start
|
| 30 |
loader = PyPDFLoader("TrendingMedia_ChatbotBasis_FINAL.pdf")
|
| 31 |
documents = loader.load()
|
| 32 |
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
|
|
|
| 35 |
db = FAISS.from_documents(texts, embeddings)
|
| 36 |
retriever = db.as_retriever(search_kwargs={"k": 2})
|
| 37 |
|
| 38 |
+
# Verwende den neuen GermanGPT2-Wrapper als LLM
|
| 39 |
+
llm = GermanGPT2(max_new_tokens=128, temperature=0.7)
|
| 40 |
|
| 41 |
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
|
| 42 |
|