Spaces:
Sleeping
Sleeping
| import os | |
| import openai | |
| import langchain | |
| import nltk | |
| import gradio as gr | |
| import shutil | |
| import tempfile | |
| from datasets import load_dataset | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.text_splitter import TokenTextSplitter | |
| from langchain.chains import RetrievalQA | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.document_loaders import UnstructuredFileLoader | |
| openai.api_key = os.environ.get("OPENAI_API_KEY") | |
| disclaimer = """ | |
| 注意事項及免責事項 Disclaimer and Precautions. | |
| """ | |
| # Configure HuggingFace repository | |
| repo_path = "your-username/your-repo-name" | |
| persist_directory = f"hf://{repo_path}/data/" | |
| # Function for processing uploaded file | |
| def process_uploaded_file(file): | |
| if file is not None: | |
| with tempfile.NamedTemporaryFile(delete=False) as tmp_file: | |
| tmp_file.write(file.read()) | |
| tmp_file.flush() | |
| dataset = load_dataset("text", data_files=tmp_file.name, split="train") | |
| dataset.save_to_disk(persist_directory) | |
| loader = UnstructuredFileLoader(persist_directory) | |
| uploaded_doc = loader.load() | |
| text_splitter = TokenTextSplitter(chunk_size=100, chunk_overlap=30) | |
| uploaded_docs = text_splitter.split_documents(uploaded_doc) | |
| embeddings = OpenAIEmbeddings() | |
| vStore = Chroma.from_documents(uploaded_docs, embeddings) | |
| global model | |
| model = RetrievalQA.from_chain_type( | |
| llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo", max_tokens=256), | |
| chain_type="stuff", | |
| retriever=vStore.as_retriever() | |
| ) | |
| # Define the function | |
| def askandanswer(question, language, uploaded_file): | |
| process_uploaded_file(uploaded_file) | |
| return model.run("请创建一个简单的回答" + language + "问题。 [问题] " + question) | |
| # Create a web application | |
| app = gr.Interface( | |
| fn=askandanswer, | |
| inputs=[ | |
| gr.Textbox(placeholder="请输入查询"), | |
| gr.Dropdown(["中文 Chinese", "英语 English"], label="语言 Language"), | |
| gr.UploadButton() | |
| ], | |
| outputs="text", | |
| title="文件的聊天知音", | |
| description="这是一个可以和任何文件进行理解的助手", | |
| article=disclaimer | |
| ) | |
| # Launch the web app | |
| app.launch() |