Spaces:
Sleeping
Sleeping
| import os | |
| from langchain.document_loaders import TextLoader | |
| from langchain.indexes import VectorstoreIndexCreator | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.vectorstores import Pinecone, Chroma | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.chains import ConversationalRetrievalChain | |
| #from langchain.chat_models import ChatOpenAI | |
| from langchain.llms import OpenAI | |
| from langchain.document_loaders import DirectoryLoader | |
| #from langchain.document_loaders import TextLoader | |
| #print(os.getcwd()) | |
| #print(os.listdir()) | |
| txt_loader = DirectoryLoader('www.mlconsultants.net/', glob = "**/*.txt") | |
| documents = txt_loader.load() | |
| #Gibt es bessere Lösungen des Preprocessings? | |
| def remove_newlines(serie): | |
| serie = serie.replace('\n', ' ') | |
| serie = serie.replace('\\n', ' ') | |
| serie = serie.replace(' ', ' ') | |
| serie = serie.replace(' ', ' ') | |
| return serie | |
| #Anwenden Preprocessing | |
| for x in range(0, len(documents)): | |
| documents[x].page_content = remove_newlines(documents[x].page_content) | |
| #documents[1] | |
| #print(len(documents)) | |
| #documents[6] | |
| #Splittet | |
| text_splitter = CharacterTextSplitter(chunk_size =1000, chunk_overlap = 20, separator=" ") | |
| documents = text_splitter.split_documents(documents) | |
| print(len(documents)) | |
| documents[0] | |
| embeddings = OpenAIEmbeddings() | |
| #Documente werden in Embeddings umgewandelt | |
| vectorstore = Chroma.from_documents(documents, embeddings) # Welche Kosten verursacht es ? | |
| #Similarity search by top6 | |
| retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs = {"k":5}) | |
| qa = ConversationalRetrievalChain.from_llm(OpenAI(model_name = "text-davinci-003"), retriever) | |
| chat_history = [] | |
| import gradio as gr | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # M&L Projekt: Human Search - Wissensdatenbanken in natürlicher Sprache anfragen und erhalten! | |
| Ich bin eine generative KI und mir steht das Wissen der M&L Webseite bis Mai 2023 zur Verfügung. | |
| Frage mich etwas! Z.b. Was ist die Targetmatrix? Wen oder was sponsort die M&L? Wann wurde die M&L gegründet? Uvm. | |
| Bitte beachte: Ich bin ein erster Protoyp und mein Können wird noch ausgebaut! | |
| """ | |
| ) | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox() | |
| clear = gr.Button("Clear") | |
| def respond(user_message, chat_history): | |
| print(user_message) | |
| #print(chat_history) | |
| if chat_history: | |
| chat_history = [tuple(sublist) for sublist in chat_history] | |
| print(chat_history) | |
| # Get response from QA chain | |
| response = qa({"question": user_message, "chat_history": chat_history}) | |
| # Append user message and response to chat history | |
| chat_history.append((user_message, response["answer"])) | |
| print(chat_history) | |
| return "", chat_history | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot], queue=False) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| demo.launch(debug=True) |