Semantic_Search / app.py
Nigz's picture
Update app.py
0b16223 verified
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.document_loaders import (
PyPDFLoader, TextLoader, Docx2txtLoader, UnstructuredHTMLLoader
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv
import os
import gradio as gr
load_dotenv()
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001", temperature=0.5)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
VECTOR_STORE_PATH = "faiss_store_openai"
def load_any_file(file_path):
ext = os.path.splitext(file_path)[1].lower()
if ext == ".pdf":
loader = PyPDFLoader(file_path)
elif ext == ".txt":
loader = TextLoader(file_path)
elif ext == ".docx":
loader = Docx2txtLoader(file_path)
elif ext in [".html", ".htm"]:
loader = UnstructuredHTMLLoader(file_path)
else:
raise ValueError(f"Unsupported file type: {ext}")
return loader.load()
def process_inputs(url, file):
data = []
if url:
loader = UnstructuredURLLoader(urls=[url])
data.extend(loader.load())
if file:
upload_file_path = file.name
data.extend(load_any_file(upload_file_path))
if not data:
return "Please provide a URL or a file to process.", gr.update(visible=False), gr.update(visible=False)
splitter = RecursiveCharacterTextSplitter(
separators=['\n\n', '\n', '.', ','],
chunk_size=1000
)
docs = splitter.split_documents(data)
vectorstore = FAISS.from_documents(docs, embeddings)
vectorstore.save_local(VECTOR_STORE_PATH)
return "βœ… Documents processed successfully! Please switch to the 'Ask a Question' tab.", gr.update(visible=True), gr.update(visible=True)
def answer_question(query):
if not os.path.exists(f"{VECTOR_STORE_PATH}/index.faiss"):
return "No Data found. Please upload a document or URL first.", ""
vectorstore = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
result = chain({"question": query}, return_only_outputs=True)
return result.get("answer", "No answer generated."), result.get("sources", "No sources found.")
with gr.Blocks(title="InfoSEARCH") as demo:
gr.Markdown("""
# 🧾 InfoSEARCH
Upload a document or provide a URL. Ask anything from the content.
""")
with gr.Tab("πŸ“„ Upload or Link"):
with gr.Row():
url_input = gr.Textbox(label="Upload URL", placeholder="Paste a news article URL")
file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".docx", ".html", ".htm"])
process_btn = gr.Button("πŸ“₯ Process Input")
process_status = gr.Textbox(label="Status", interactive=False)
jump_notice = gr.Textbox(visible=False, interactive=False)
with gr.Tab("❓ Ask a Question"):
query_input = gr.Textbox(label="Ask a question", placeholder="Type your question here and hit Enter")
answer_output = gr.Textbox(label="🧾 Answer", lines=4)
sources_output = gr.Textbox(label="πŸ”— Sources", lines=3)
process_btn.click(
fn=process_inputs,
inputs=[url_input, file_input],
outputs=[process_status, jump_notice, query_input]
)
query_input.submit(fn=answer_question, inputs=query_input, outputs=[answer_output, sources_output])
demo.launch()