ml-test / app.py
vogelweide85's picture
deleted key and created new one
284588a
import os
from langchain.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone, Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
#from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.document_loaders import DirectoryLoader
#from langchain.document_loaders import TextLoader
#print(os.getcwd())
#print(os.listdir())
txt_loader = DirectoryLoader('www.mlconsultants.net/', glob = "**/*.txt")
documents = txt_loader.load()
#Gibt es bessere Lösungen des Preprocessings?
def remove_newlines(serie):
serie = serie.replace('\n', ' ')
serie = serie.replace('\\n', ' ')
serie = serie.replace(' ', ' ')
serie = serie.replace(' ', ' ')
return serie
#Anwenden Preprocessing
for x in range(0, len(documents)):
documents[x].page_content = remove_newlines(documents[x].page_content)
#documents[1]
#print(len(documents))
#documents[6]
#Splittet
text_splitter = CharacterTextSplitter(chunk_size =1000, chunk_overlap = 20, separator=" ")
documents = text_splitter.split_documents(documents)
print(len(documents))
documents[0]
embeddings = OpenAIEmbeddings()
#Documente werden in Embeddings umgewandelt
vectorstore = Chroma.from_documents(documents, embeddings) # Welche Kosten verursacht es ?
#Similarity search by top6
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs = {"k":5})
qa = ConversationalRetrievalChain.from_llm(OpenAI(model_name = "text-davinci-003"), retriever)
chat_history = []
import gradio as gr
with gr.Blocks() as demo:
gr.Markdown(
"""
# M&L Projekt: Human Search - Wissensdatenbanken in natürlicher Sprache anfragen und erhalten!
Ich bin eine generative KI und mir steht das Wissen der M&L Webseite bis Mai 2023 zur Verfügung.
Frage mich etwas! Z.b. Was ist die Targetmatrix? Wen oder was sponsort die M&L? Wann wurde die M&L gegründet? Uvm.
Bitte beachte: Ich bin ein erster Protoyp und mein Können wird noch ausgebaut!
"""
)
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.Button("Clear")
def respond(user_message, chat_history):
print(user_message)
#print(chat_history)
if chat_history:
chat_history = [tuple(sublist) for sublist in chat_history]
print(chat_history)
# Get response from QA chain
response = qa({"question": user_message, "chat_history": chat_history})
# Append user message and response to chat history
chat_history.append((user_message, response["answer"]))
print(chat_history)
return "", chat_history
msg.submit(respond, [msg, chatbot], [msg, chatbot], queue=False)
clear.click(lambda: None, None, chatbot, queue=False)
demo.launch(debug=True)