Spaces:
Runtime error
Runtime error
| from dotenv import load_dotenv | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| from langchain.embeddings import OpenAIEmbeddings # for creating embeddings | |
| from langchain.vectorstores import Chroma # for the vectorization part | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.llms import OpenAI # the LLM model we'll use (CHatGPT) | |
| import gradio as gr | |
| max_sources = 4 | |
| DB_DIR = "chroma" | |
| embedding = OpenAIEmbeddings() | |
| vectordb = Chroma(persist_directory=DB_DIR, embedding_function=embedding) | |
| pdf_qa = ConversationalRetrievalChain.from_llm( | |
| OpenAI(temperature=0.9, model_name="gpt-3.5-turbo"), | |
| vectordb.as_retriever(), | |
| return_source_documents=True, | |
| ) | |
| def chat_pdf(query, chat_history=""): | |
| result = pdf_qa({"question": query, "chat_history": chat_history}) | |
| answer = result["answer"] | |
| source_docs = result["source_documents"] | |
| print("source_docs", len(source_docs)) | |
| cleaned_docs = [] | |
| for doc in source_docs: | |
| cleaned_content = doc.page_content | |
| metadata_info = f"Metadata: {doc.metadata}\n" | |
| cleaned_docs.append(metadata_info + cleaned_content) | |
| # Pad the outputs to match the number of output components in the Gradio interface | |
| padded_outputs = [answer] + cleaned_docs + [""] * (max_sources - len(cleaned_docs)) | |
| return padded_outputs | |
| def create_outputs(num_sources): | |
| outputs = [gr.outputs.Textbox(label="Answer")] | |
| for i in range(1, num_sources + 1): | |
| outputs.append(gr.outputs.Textbox(label=f"Source Document {i}")) | |
| return outputs | |
| iface = gr.Interface( | |
| fn=chat_pdf, | |
| inputs=[gr.inputs.Textbox(label="Query")], | |
| outputs=create_outputs(max_sources), | |
| examples=[ | |
| ["Give 2 species of fulgoroidea"], | |
| ["What colors are found among fulgoroidea?"], | |
| ["Why are fulgoroidea so cute?"], | |
| ], | |
| ) | |
| iface.launch(debug=True) | |