10tenfirestorm commited on
Commit
ea8cecd
·
verified ·
1 Parent(s): c9fce51

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain_community.document_loaders import WebBaseLoader, PyMuPDFLoader
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_community.llms import HuggingFaceHub
7
+ from langchain.chains.question_answering import load_qa_chain
8
+
9
+ # --- CONFIGURATION ---
10
+ # We get the token from the Space's secret environment variables
11
+ hf_token = os.environ.get("HF_TOKEN")
12
+
13
+ if not hf_token:
14
+ raise ValueError("HF_TOKEN not found in environment variables. Please set it in Space Settings.")
15
+
16
+ # --- LOGIC ---
17
+ def load_pdf(file_path):
18
+ loader = PyMuPDFLoader(file_path)
19
+ docs = loader.load()
20
+ return docs
21
+
22
+ def load_website(url):
23
+ loader = WebBaseLoader(url)
24
+ docs = loader.load()
25
+ return docs
26
+
27
+ def setup_vector_store(docs):
28
+ # Using a standard embedding model compatible with CPU
29
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
30
+ vector_store = FAISS.from_documents(docs, embeddings)
31
+ return vector_store
32
+
33
+ def ask_question(query, vector_store):
34
+ retriever = vector_store.as_retriever()
35
+ docs = retriever.get_relevant_documents(query)
36
+
37
+ # Using HuggingFaceEndpoint (newer) or Hub to call Mixtral
38
+ llm = HuggingFaceHub(
39
+ repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
40
+ model_kwargs={"temperature": 0.7, "max_length": 512},
41
+ huggingfacehub_api_token=hf_token
42
+ )
43
+
44
+ chain = load_qa_chain(llm, chain_type="stuff")
45
+ response = chain.run(input_documents=docs, question=query)
46
+ return response
47
+
48
+ def process_input(weblink, pdf_file, question):
49
+ docs = []
50
+
51
+ # Error handling for empty inputs
52
+ if not weblink and not pdf_file:
53
+ return "Please provide a website link or upload a PDF."
54
+ if not question:
55
+ return "Please ask a question."
56
+
57
+ try:
58
+ if weblink:
59
+ docs.extend(load_website(weblink))
60
+ if pdf_file:
61
+ docs.extend(load_pdf(pdf_file.name)) # Gradio handles file paths
62
+
63
+ vector_store = setup_vector_store(docs)
64
+ response = ask_question(question, vector_store)
65
+ return response
66
+ except Exception as e:
67
+ return f"An error occurred: {str(e)}"
68
+
69
+ # --- INTERFACE ---
70
+ demo = gr.Interface(
71
+ fn=process_input,
72
+ inputs=[
73
+ gr.Textbox(label="Website Link (Optional)"),
74
+ gr.File(label="Upload PDF (Optional)"),
75
+ gr.Textbox(label="Ask a Question")
76
+ ],
77
+ outputs=gr.Textbox(label="Final Answer"),
78
+ title="Web & PDF QA System",
79
+ description="Upload a PDF or enter a website URL to chat with the content."
80
+ )
81
+
82
+ if __name__ == "__main__":
83
+ demo.launch()
84
+