Spaces:
Sleeping
Sleeping
| import os | |
| import pickle | |
| import time | |
| import gradio as gr | |
| from langchain import OpenAI | |
| from langchain.chains import RetrievalQAWithSourcesChain | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import UnstructuredURLLoader | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from dotenv import load_dotenv | |
| load_dotenv() # take environment variables from .env (especially openai api key) | |
| # Define the main function to process URLs and handle queries | |
| def process_and_query(url1, url2, url3, query): | |
| urls = [url1, url2, url3] | |
| file_path = "faiss_store_openai.pkl" | |
| llm = OpenAI(temperature=0.9, max_tokens=500) | |
| # Load data | |
| loader = UnstructuredURLLoader(urls=urls) | |
| data = loader.load() | |
| # Split data | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| separators=['\n\n', '\n', '.', ','], | |
| chunk_size=1000 | |
| ) | |
| docs = text_splitter.split_documents(data) | |
| # Create embeddings and save it to FAISS index | |
| embeddings = OpenAIEmbeddings() | |
| vectorstore_openai = FAISS.from_documents(docs, embeddings) | |
| # Save the FAISS index to a pickle file | |
| with open(file_path, "wb") as f: | |
| pickle.dump(vectorstore_openai, f) | |
| # Process the query | |
| if os.path.exists(file_path): | |
| with open(file_path, "rb") as f: | |
| vectorstore = pickle.load(f) | |
| chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever()) | |
| result = chain({"question": query}, return_only_outputs=True) | |
| answer = result["answer"] | |
| # Extract and format sources | |
| sources = result.get("sources", "") | |
| sources_list = sources.split("\n") if sources else [] | |
| return answer, sources_list | |
| # Define the Gradio interface | |
| url1_input = gr.Textbox(label="URL 1") | |
| url2_input = gr.Textbox(label="URL 2") | |
| url3_input = gr.Textbox(label="URL 3") | |
| query_input = gr.Textbox(label="Question") | |
| output_text = gr.Textbox(label="Answer") | |
| output_sources = gr.Textbox(label="Sources") | |
| interface = gr.Interface( | |
| fn=process_and_query, | |
| inputs=[url1_input, url2_input, url3_input, query_input], | |
| outputs=[output_text, output_sources], | |
| title="RockyBot: News Research Tool 📈", | |
| description="Enter up to three news article URLs and ask a question. The bot will process the articles and provide an answer along with the sources." | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() | |