Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import openai | |
| from langchain.chains import ConversationalRetrievalChain, RetrievalQA | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.document_loaders import DirectoryLoader, TextLoader | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.indexes import VectorstoreIndexCreator | |
| from langchain.indexes.vectorstore import VectorStoreIndexWrapper | |
| from langchain.llms import OpenAI | |
| from langchain.text_splitter import CharacterTextSplitter | |
| __import__('pysqlite3') | |
| import sys | |
| sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
| from langchain.vectorstores import Chroma | |
| import gradio as gr | |
| os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY") | |
| docs = [] | |
| for f in os.listdir("multiple_docs"): | |
| if f.endswith(".pdf"): | |
| pdf_path = "./multiple_docs/" + f | |
| loader = PyPDFLoader(pdf_path) | |
| docs.extend(loader.load()) | |
| elif f.endswith('.docx') or f.endswith('.doc'): | |
| doc_path = "./multiple_docs/" + f | |
| loader = Docx2txtLoader(doc_path) | |
| docs.extend(loader.load()) | |
| elif f.endswith('.txt'): | |
| text_path = "./multiple_docs/" + f | |
| loader = TextLoader(text_path) | |
| docs.extend(loader.load()) | |
| splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) | |
| docs = splitter.split_documents(docs) | |
| # Convert the document chunks to embedding and save them to the vector store | |
| vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./db") | |
| vectorstore.persist() | |
| chain = ConversationalRetrievalChain.from_llm( | |
| ChatOpenAI(temperature=0.1, model_name='gpt-3.5-turbo'), | |
| retriever=vectorstore.as_retriever(search_kwargs={'k': 6}), | |
| return_source_documents=True, | |
| verbose=False | |
| ) | |
| chat_history = [] | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")],avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"]) | |
| msg = gr.Textbox() | |
| clear = gr.Button("Clear") | |
| chat_history = [] | |
| def user(query, chat_history): | |
| # print("User query:", query) | |
| # print("Chat history:", chat_history) | |
| # Convert chat history to list of tuples | |
| chat_history_tuples = [] | |
| for message in chat_history: | |
| chat_history_tuples.append((message[0], message[1])) | |
| # Get result from QA chain | |
| result = chain({"question": query, "chat_history": chat_history_tuples}) | |
| # Append user message and response to chat history | |
| chat_history.append((query, result["answer"])) | |
| # print("Updated chat history:", chat_history) | |
| return gr.update(value=""), chat_history | |
| msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| demo.launch(debug=True) | |
| # import os | |
| # import sys | |
| # from langchain.chains import ConversationalRetrievalChain | |
| # from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader | |
| # from langchain.text_splitter import CharacterTextSplitter | |
| # from langchain.vectorstores import Chroma | |
| # import gradio as gr | |
| # from transformers import pipeline | |
| # from sentence_transformers import SentenceTransformer | |
| # __import__('pysqlite3') | |
| # sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
| # docs = [] | |
| # for f in os.listdir("multiple_docs"): | |
| # if f.endswith(".pdf"): | |
| # pdf_path = "./multiple_docs/" + f | |
| # loader = PyPDFLoader(pdf_path) | |
| # docs.extend(loader.load()) | |
| # elif f.endswith('.docx') or f.endswith('.doc'): | |
| # doc_path = "./multiple_docs/" + f | |
| # loader = Docx2txtLoader(doc_path) | |
| # docs.extend(loader.load()) | |
| # elif f.endswith('.txt'): | |
| # text_path = "./multiple_docs/" + f | |
| # loader = TextLoader(text_path) | |
| # docs.extend(loader.load()) | |
| # splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) | |
| # docs = splitter.split_documents(docs) | |
| # # Extract the content from documents and create embeddings | |
| # embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| # texts = [doc.page_content for doc in docs] | |
| # embeddings = embedding_model.encode(texts).tolist() # Convert numpy arrays to lists | |
| # # Create a Chroma vector store and add documents and their embeddings | |
| # vectorstore = Chroma(persist_directory="./db", embedding_function=embedding_model.encode) | |
| # vectorstore.add_texts(texts=texts, metadatas=[{"id": i} for i in range(len(texts))], embeddings=embeddings) | |
| # vectorstore.persist() | |
| # # Load the Hugging Face model for text generation | |
| # generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B") | |
| # class HuggingFaceLLMWrapper: | |
| # def __init__(self, generator): | |
| # self.generator = generator | |
| # def __call__(self, prompt, max_length=512): | |
| # result = self.generator(prompt, max_length=max_length, num_return_sequences=1) | |
| # return result[0]['generated_text'] | |
| # llm = HuggingFaceLLMWrapper(generator) | |
| # chain = ConversationalRetrievalChain.from_llm( | |
| # llm, | |
| # retriever=vectorstore.as_retriever(search_kwargs={'k': 6}), | |
| # return_source_documents=True, | |
| # verbose=False | |
| # ) | |
| # chat_history = [] | |
| # with gr.Blocks() as demo: | |
| # chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")], avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"]) | |
| # msg = gr.Textbox() | |
| # clear = gr.Button("Clear") | |
| # chat_history = [] | |
| # def user(query, chat_history): | |
| # # Convert chat history to list of tuples | |
| # chat_history_tuples = [] | |
| # for message in chat_history: | |
| # chat_history_tuples.append((message[0], message[1])) | |
| # # Get result from QA chain | |
| # result = chain({"question": query, "chat_history": chat_history_tuples}) | |
| # # Append user message and response to chat history | |
| # chat_history.append((query, result["answer"])) | |
| # return gr.update(value=""), chat_history | |
| # msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False) | |
| # clear.click(lambda: None, None, chatbot, queue=False) | |
| # demo.launch(debug=True) | |