from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.document_loaders import (
    PyPDFLoader, TextLoader, Docx2txtLoader, UnstructuredHTMLLoader
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv
import os
import gradio as gr

load_dotenv()

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001", temperature=0.5)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
VECTOR_STORE_PATH = "faiss_store_openai"


def load_any_file(file_path):
    ext = os.path.splitext(file_path)[1].lower()
    if ext == ".pdf":
        loader = PyPDFLoader(file_path)
    elif ext == ".txt":
        loader = TextLoader(file_path)
    elif ext == ".docx":
        loader = Docx2txtLoader(file_path)
    elif ext in [".html", ".htm"]:
        loader = UnstructuredHTMLLoader(file_path)
    else:
        raise ValueError(f"Unsupported file type: {ext}")
    return loader.load()


def process_inputs(url, file):
    data = []
    if url:
        loader = UnstructuredURLLoader(urls=[url])
        data.extend(loader.load())

    if file:
        upload_file_path = file.name
        data.extend(load_any_file(upload_file_path))

    if not data:
        return "Please provide a URL or a file to process.", gr.update(visible=False), gr.update(visible=False)

    splitter = RecursiveCharacterTextSplitter(
        separators=['\n\n', '\n', '.', ','],
        chunk_size=1000
    )
    docs = splitter.split_documents(data)

    vectorstore = FAISS.from_documents(docs, embeddings)
    vectorstore.save_local(VECTOR_STORE_PATH)

    return "✅ Documents processed successfully! Please switch to the 'Ask a Question' tab.", gr.update(visible=True), gr.update(visible=True)


def answer_question(query):
    if not os.path.exists(f"{VECTOR_STORE_PATH}/index.faiss"):
        return "No Data found. Please upload a document or URL first.", ""

    vectorstore = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
    chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
    result = chain({"question": query}, return_only_outputs=True)

    return result.get("answer", "No answer generated."), result.get("sources", "No sources found.")


with gr.Blocks(title="InfoSEARCH") as demo:
    gr.Markdown("""
    # 🧾 InfoSEARCH
    Upload a document or provide a URL. Ask anything from the content.
    """)

    with gr.Tab("📄 Upload or Link"):
        with gr.Row():
            url_input = gr.Textbox(label="Upload URL", placeholder="Paste a news article URL")
            file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".docx", ".html", ".htm"])
        process_btn = gr.Button("📥 Process Input")
        process_status = gr.Textbox(label="Status", interactive=False)
        jump_notice = gr.Textbox(visible=False, interactive=False)

    with gr.Tab("❓ Ask a Question"):
        query_input = gr.Textbox(label="Ask a question", placeholder="Type your question here and hit Enter")
        answer_output = gr.Textbox(label="🧾 Answer", lines=4)
        sources_output = gr.Textbox(label="🔗 Sources", lines=3)

    process_btn.click(
        fn=process_inputs,
        inputs=[url_input, file_input],
        outputs=[process_status, jump_notice, query_input]
    )
    query_input.submit(fn=answer_question, inputs=query_input, outputs=[answer_output, sources_output])


demo.launch()