Spaces:
Sleeping
Sleeping
| from langchain_community.document_loaders import UnstructuredURLLoader | |
| from langchain_community.document_loaders import ( | |
| PyPDFLoader, TextLoader, Docx2txtLoader, UnstructuredHTMLLoader | |
| ) | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from dotenv import load_dotenv | |
| import os | |
| import gradio as gr | |
| load_dotenv() | |
| llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001", temperature=0.5) | |
| embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") | |
| VECTOR_STORE_PATH = "faiss_store_openai" | |
| def load_any_file(file_path): | |
| ext = os.path.splitext(file_path)[1].lower() | |
| if ext == ".pdf": | |
| loader = PyPDFLoader(file_path) | |
| elif ext == ".txt": | |
| loader = TextLoader(file_path) | |
| elif ext == ".docx": | |
| loader = Docx2txtLoader(file_path) | |
| elif ext in [".html", ".htm"]: | |
| loader = UnstructuredHTMLLoader(file_path) | |
| else: | |
| raise ValueError(f"Unsupported file type: {ext}") | |
| return loader.load() | |
| def process_inputs(url, file): | |
| data = [] | |
| if url: | |
| loader = UnstructuredURLLoader(urls=[url]) | |
| data.extend(loader.load()) | |
| if file: | |
| upload_file_path = file.name | |
| data.extend(load_any_file(upload_file_path)) | |
| if not data: | |
| return "Please provide a URL or a file to process.", gr.update(visible=False), gr.update(visible=False) | |
| splitter = RecursiveCharacterTextSplitter( | |
| separators=['\n\n', '\n', '.', ','], | |
| chunk_size=1000 | |
| ) | |
| docs = splitter.split_documents(data) | |
| vectorstore = FAISS.from_documents(docs, embeddings) | |
| vectorstore.save_local(VECTOR_STORE_PATH) | |
| return "β Documents processed successfully! Please switch to the 'Ask a Question' tab.", gr.update(visible=True), gr.update(visible=True) | |
| def answer_question(query): | |
| if not os.path.exists(f"{VECTOR_STORE_PATH}/index.faiss"): | |
| return "No Data found. Please upload a document or URL first.", "" | |
| vectorstore = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True) | |
| chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever()) | |
| result = chain({"question": query}, return_only_outputs=True) | |
| return result.get("answer", "No answer generated."), result.get("sources", "No sources found.") | |
| with gr.Blocks(title="InfoSEARCH") as demo: | |
| gr.Markdown(""" | |
| # π§Ύ InfoSEARCH | |
| Upload a document or provide a URL. Ask anything from the content. | |
| """) | |
| with gr.Tab("π Upload or Link"): | |
| with gr.Row(): | |
| url_input = gr.Textbox(label="Upload URL", placeholder="Paste a news article URL") | |
| file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".docx", ".html", ".htm"]) | |
| process_btn = gr.Button("π₯ Process Input") | |
| process_status = gr.Textbox(label="Status", interactive=False) | |
| jump_notice = gr.Textbox(visible=False, interactive=False) | |
| with gr.Tab("β Ask a Question"): | |
| query_input = gr.Textbox(label="Ask a question", placeholder="Type your question here and hit Enter") | |
| answer_output = gr.Textbox(label="π§Ύ Answer", lines=4) | |
| sources_output = gr.Textbox(label="π Sources", lines=3) | |
| process_btn.click( | |
| fn=process_inputs, | |
| inputs=[url_input, file_input], | |
| outputs=[process_status, jump_notice, query_input] | |
| ) | |
| query_input.submit(fn=answer_question, inputs=query_input, outputs=[answer_output, sources_output]) | |
| demo.launch() | |