Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ from bs4 import BeautifulSoup
|
|
| 8 |
from sentence_transformers import SentenceTransformer
|
| 9 |
from langchain_community.vectorstores import FAISS
|
| 10 |
from langchain.embeddings.base import Embeddings
|
| 11 |
-
|
| 12 |
|
| 13 |
# === Embeddings Wrapper ===
|
| 14 |
class SentenceTransformerEmbeddings(Embeddings):
|
|
@@ -35,27 +35,28 @@ def split_text(text, chunk_size=500, overlap=50):
|
|
| 35 |
start += chunk_size - overlap
|
| 36 |
return chunks
|
| 37 |
|
| 38 |
-
def
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
| 50 |
|
| 51 |
def create_vectorstore(chunks):
|
| 52 |
embeddings = SentenceTransformerEmbeddings()
|
| 53 |
return FAISS.from_texts(chunks, embedding=embeddings)
|
| 54 |
|
| 55 |
-
def generate_answer(vectorstore, question,
|
| 56 |
docs = vectorstore.similarity_search(question, k=3)
|
| 57 |
context = "\n".join([doc.page_content for doc in docs])
|
| 58 |
-
return
|
| 59 |
|
| 60 |
def extract_website_text(url):
|
| 61 |
try:
|
|
@@ -75,7 +76,7 @@ st.title("π RAG Assistant: Chat with PDF, CSV, or Website")
|
|
| 75 |
# Sidebar
|
| 76 |
with st.sidebar:
|
| 77 |
data_source = st.selectbox("π Select Input Type", ["PDF", "CSV", "Website URL"])
|
| 78 |
-
|
| 79 |
|
| 80 |
# === Logic by Data Source ===
|
| 81 |
vectorstore = None
|
|
@@ -120,12 +121,12 @@ elif data_source == "Website URL":
|
|
| 120 |
st.success("β
Website text extracted and indexed!")
|
| 121 |
|
| 122 |
# === QA Section ===
|
| 123 |
-
if vectorstore and
|
| 124 |
st.subheader("β Ask a Question")
|
| 125 |
question = st.text_input("π¬ Your question")
|
| 126 |
if question:
|
| 127 |
with st.spinner("π Thinking..."):
|
| 128 |
-
answer, top_docs = generate_answer(vectorstore, question,
|
| 129 |
st.success("π§ Answer")
|
| 130 |
st.write(answer)
|
| 131 |
|
|
@@ -135,6 +136,5 @@ if vectorstore and gemini_api_key:
|
|
| 135 |
|
| 136 |
st.download_button("π€ Download Answer", answer, file_name="rag_answer.txt")
|
| 137 |
|
| 138 |
-
elif not
|
| 139 |
-
st.info("π Please enter your
|
| 140 |
-
|
|
|
|
| 8 |
from sentence_transformers import SentenceTransformer
|
| 9 |
from langchain_community.vectorstores import FAISS
|
| 10 |
from langchain.embeddings.base import Embeddings
|
| 11 |
+
from transformers import pipeline
|
| 12 |
|
| 13 |
# === Embeddings Wrapper ===
|
| 14 |
class SentenceTransformerEmbeddings(Embeddings):
|
|
|
|
| 35 |
start += chunk_size - overlap
|
| 36 |
return chunks
|
| 37 |
|
| 38 |
+
def ask_mistral(question, context, hf_api_key):
|
| 39 |
+
# Load the Hugging Face Mistral model pipeline
|
| 40 |
+
nlp = pipeline("question-answering", model="mistral-7b", tokenizer="mistral-7b", use_auth_token=hf_api_key)
|
| 41 |
+
|
| 42 |
+
# Format the input
|
| 43 |
+
inputs = {
|
| 44 |
+
'context': context,
|
| 45 |
+
'question': question
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
# Generate the answer using Mistral
|
| 49 |
+
answer = nlp(inputs)
|
| 50 |
+
return answer['answer']
|
| 51 |
|
| 52 |
def create_vectorstore(chunks):
|
| 53 |
embeddings = SentenceTransformerEmbeddings()
|
| 54 |
return FAISS.from_texts(chunks, embedding=embeddings)
|
| 55 |
|
| 56 |
+
def generate_answer(vectorstore, question, hf_api_key):
|
| 57 |
docs = vectorstore.similarity_search(question, k=3)
|
| 58 |
context = "\n".join([doc.page_content for doc in docs])
|
| 59 |
+
return ask_mistral(question, context, hf_api_key), docs
|
| 60 |
|
| 61 |
def extract_website_text(url):
|
| 62 |
try:
|
|
|
|
| 76 |
# Sidebar
|
| 77 |
with st.sidebar:
|
| 78 |
data_source = st.selectbox("π Select Input Type", ["PDF", "CSV", "Website URL"])
|
| 79 |
+
hf_api_key = st.text_input("π Enter Hugging Face API Key", type="password")
|
| 80 |
|
| 81 |
# === Logic by Data Source ===
|
| 82 |
vectorstore = None
|
|
|
|
| 121 |
st.success("β
Website text extracted and indexed!")
|
| 122 |
|
| 123 |
# === QA Section ===
|
| 124 |
+
if vectorstore and hf_api_key:
|
| 125 |
st.subheader("β Ask a Question")
|
| 126 |
question = st.text_input("π¬ Your question")
|
| 127 |
if question:
|
| 128 |
with st.spinner("π Thinking..."):
|
| 129 |
+
answer, top_docs = generate_answer(vectorstore, question, hf_api_key)
|
| 130 |
st.success("π§ Answer")
|
| 131 |
st.write(answer)
|
| 132 |
|
|
|
|
| 136 |
|
| 137 |
st.download_button("π€ Download Answer", answer, file_name="rag_answer.txt")
|
| 138 |
|
| 139 |
+
elif not hf_api_key:
|
| 140 |
+
st.info("π Please enter your Hugging Face API key in the sidebar.")
|
|
|