| | import gradio as gr |
| | import random |
| | import os |
| | import re |
| | import json |
| | from openai import OpenAI |
| |
|
| | from langchain_openai import ChatOpenAI |
| | from langchain_community.document_loaders.csv_loader import CSVLoader |
| | from langchain_community.document_loaders import JSONLoader |
| | from langchain_community.document_loaders import PyPDFLoader |
| |
|
| | from langchain.text_splitter import RecursiveCharacterTextSplitter |
| | from langchain_community.vectorstores import FAISS |
| | from langchain_openai import OpenAIEmbeddings |
| | from langchain.chains import RetrievalQA |
| |
|
| | os.environ["OPENAI_API_KEY"] = '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' |
| | client = OpenAI( |
| | api_key=os.environ.get("OPENAI_API_KEY"), |
| | ) |
| |
|
| | |
| | json_loader = JSONLoader( |
| | file_path="data.json", |
| | jq_schema=".[]", |
| | text_content=False |
| | ) |
| |
|
| | json_doc = json_loader.load() |
| |
|
| | |
| | pdf_loader = PyPDFLoader("./paper.pdf") |
| | documents = pdf_loader.load() |
| | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50) |
| | pdf_doc = text_splitter.split_documents(documents) |
| |
|
| | documents = json_doc + pdf_doc |
| |
|
| | |
| | embeddings = OpenAIEmbeddings() |
| | |
| | vector_store = FAISS.from_documents(documents, embeddings) |
| |
|
| | llm = ChatOpenAI(model="gpt-4o") |
| | |
| | retriever = vector_store.as_retriever() |
| | |
| | qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff", |
| | return_source_documents=True) |
| |
|
| |
|
| | question = { |
| | 'Q1': 'How do I compare gene expression across different models?', |
| | 'Q2': 'How do I filter datasets by genotype?', |
| | 'Q3': 'Can I analyze PCA plots to explore differences between NSCLC and SCLC?', |
| | 'Q4': "How can I compare primary and metastatic samples?", |
| | 'Q5': "Where can I download reprocessed gene expression data?", |
| | 'Other': "Other questions." |
| | } |
| |
|
| | def openai(message, history): |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | answer = qa_chain.invoke({"query": message}) |
| | res = answer['result'] |
| | return res |
| |
|
| | demo = gr.ChatInterface( |
| | fn = openai, |
| | type = "messages", |
| | chatbot=gr.Chatbot(), |
| | |
| | title="test bot", |
| | |
| | theme="ocean", |
| | examples=[question["Q1"], question["Q2"], question["Q3"], question["Q4"], question["Q5"], question["Other"]], |
| | |
| | css = ".svelte-i3tvor {visibility: hidden} footer {visibility: hidden} .placeholder-content.svelte-lykdgn.svelte-lykdgn {justify-content: flex-end; flex-direction: row} .examples.svelte-lykdgn.svelte-lykdgn { display: block; margin: 0 } .example.svelte-lykdgn.svelte-lykdgn{max-width: 300px; margin-bottom: 15px}", |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|