import os import openai import langchain import nltk import gradio as gr import shutil import tempfile from datasets import load_dataset from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.text_splitter import TokenTextSplitter from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI from langchain.document_loaders import UnstructuredFileLoader openai.api_key = os.environ.get("OPENAI_API_KEY") disclaimer = """ 注意事項及免責事項 Disclaimer and Precautions. """ # Configure HuggingFace repository repo_path = "your-username/your-repo-name" persist_directory = f"hf://{repo_path}/data/" # Function for processing uploaded file def process_uploaded_file(file): if file is not None: with tempfile.NamedTemporaryFile(delete=False) as tmp_file: tmp_file.write(file.read()) tmp_file.flush() dataset = load_dataset("text", data_files=tmp_file.name, split="train") dataset.save_to_disk(persist_directory) loader = UnstructuredFileLoader(persist_directory) uploaded_doc = loader.load() text_splitter = TokenTextSplitter(chunk_size=100, chunk_overlap=30) uploaded_docs = text_splitter.split_documents(uploaded_doc) embeddings = OpenAIEmbeddings() vStore = Chroma.from_documents(uploaded_docs, embeddings) global model model = RetrievalQA.from_chain_type( llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo", max_tokens=256), chain_type="stuff", retriever=vStore.as_retriever() ) # Define the function def askandanswer(question, language, uploaded_file): process_uploaded_file(uploaded_file) return model.run("请创建一个简单的回答" + language + "问题。 [问题] " + question) # Create a web application app = gr.Interface( fn=askandanswer, inputs=[ gr.Textbox(placeholder="请输入查询"), gr.Dropdown(["中文 Chinese", "英语 English"], label="语言 Language"), gr.UploadButton() ], outputs="text", title="文件的聊天知音", description="这是一个可以和任何文件进行理解的助手", article=disclaimer ) # Launch the web app app.launch()