Spaces:

10tenfirestorm
/

doc-qa-bot

Sleeping

App Files Files Community

10tenfirestorm commited on Dec 30, 2025

Commit

ea8cecd

verified ·

1 Parent(s): c9fce51

Create app.py

Browse files

Files changed (1) hide show

app.py +84 -0

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import os
+import gradio as gr
+from langchain_community.document_loaders import WebBaseLoader, PyMuPDFLoader
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.llms import HuggingFaceHub
+from langchain.chains.question_answering import load_qa_chain
+# --- CONFIGURATION ---
+# We get the token from the Space's secret environment variables
+hf_token = os.environ.get("HF_TOKEN")
+if not hf_token:
+    raise ValueError("HF_TOKEN not found in environment variables. Please set it in Space Settings.")
+# --- LOGIC ---
+def load_pdf(file_path):
+    loader = PyMuPDFLoader(file_path)
+    docs = loader.load()
+    return docs
+def load_website(url):
+    loader = WebBaseLoader(url)
+    docs = loader.load()
+    return docs
+def setup_vector_store(docs):
+    # Using a standard embedding model compatible with CPU
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    vector_store = FAISS.from_documents(docs, embeddings)
+    return vector_store
+def ask_question(query, vector_store):
+    retriever = vector_store.as_retriever()
+    docs = retriever.get_relevant_documents(query)
+    # Using HuggingFaceEndpoint (newer) or Hub to call Mixtral
+    llm = HuggingFaceHub(
+        repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
+        model_kwargs={"temperature": 0.7, "max_length": 512},
+        huggingfacehub_api_token=hf_token
+    )
+    chain = load_qa_chain(llm, chain_type="stuff")
+    response = chain.run(input_documents=docs, question=query)
+    return response
+def process_input(weblink, pdf_file, question):
+    docs = []
+    # Error handling for empty inputs
+    if not weblink and not pdf_file:
+        return "Please provide a website link or upload a PDF."
+    if not question:
+        return "Please ask a question."
+    try:
+        if weblink:
+            docs.extend(load_website(weblink))
+        if pdf_file:
+            docs.extend(load_pdf(pdf_file.name)) # Gradio handles file paths
+        vector_store = setup_vector_store(docs)
+        response = ask_question(question, vector_store)
+        return response
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+# --- INTERFACE ---
+demo = gr.Interface(
+    fn=process_input,
+    inputs=[
+        gr.Textbox(label="Website Link (Optional)"),
+        gr.File(label="Upload PDF (Optional)"),
+        gr.Textbox(label="Ask a Question")
+    ],
+    outputs=gr.Textbox(label="Final Answer"),
+    title="Web & PDF QA System",
+    description="Upload a PDF or enter a website URL to chat with the content."
+)
+if __name__ == "__main__":
+    demo.launch()