import os from langchain.document_loaders import TextLoader from langchain.indexes import VectorstoreIndexCreator from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Pinecone, Chroma from langchain.embeddings.openai import OpenAIEmbeddings from langchain.chains import ConversationalRetrievalChain #from langchain.chat_models import ChatOpenAI from langchain.llms import OpenAI from langchain.document_loaders import DirectoryLoader #from langchain.document_loaders import TextLoader #print(os.getcwd()) #print(os.listdir()) txt_loader = DirectoryLoader('www.mlconsultants.net/', glob = "**/*.txt") documents = txt_loader.load() #Gibt es bessere Lösungen des Preprocessings? def remove_newlines(serie): serie = serie.replace('\n', ' ') serie = serie.replace('\\n', ' ') serie = serie.replace(' ', ' ') serie = serie.replace(' ', ' ') return serie #Anwenden Preprocessing for x in range(0, len(documents)): documents[x].page_content = remove_newlines(documents[x].page_content) #documents[1] #print(len(documents)) #documents[6] #Splittet text_splitter = CharacterTextSplitter(chunk_size =1000, chunk_overlap = 20, separator=" ") documents = text_splitter.split_documents(documents) print(len(documents)) documents[0] embeddings = OpenAIEmbeddings() #Documente werden in Embeddings umgewandelt vectorstore = Chroma.from_documents(documents, embeddings) # Welche Kosten verursacht es ? #Similarity search by top6 retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs = {"k":5}) qa = ConversationalRetrievalChain.from_llm(OpenAI(model_name = "text-davinci-003"), retriever) chat_history = [] import gradio as gr with gr.Blocks() as demo: gr.Markdown( """ # M&L Projekt: Human Search - Wissensdatenbanken in natürlicher Sprache anfragen und erhalten! Ich bin eine generative KI und mir steht das Wissen der M&L Webseite bis Mai 2023 zur Verfügung. Frage mich etwas! Z.b. Was ist die Targetmatrix? Wen oder was sponsort die M&L? Wann wurde die M&L gegründet? Uvm. Bitte beachte: Ich bin ein erster Protoyp und mein Können wird noch ausgebaut! """ ) chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") def respond(user_message, chat_history): print(user_message) #print(chat_history) if chat_history: chat_history = [tuple(sublist) for sublist in chat_history] print(chat_history) # Get response from QA chain response = qa({"question": user_message, "chat_history": chat_history}) # Append user message and response to chat history chat_history.append((user_message, response["answer"])) print(chat_history) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot], queue=False) clear.click(lambda: None, None, chatbot, queue=False) demo.launch(debug=True)