Lexicon_Chatbot / src /streamlit_app.py
Harishkhawaja's picture
Update src/streamlit_app.py
cf8eb7f verified
import streamlit as st
import fitz # PyMuPDF
import os
from sentence_transformers import SentenceTransformer
import faiss
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.chains import RetrievalQA
from langchain.llms import Groq
from dotenv import load_dotenv
load_dotenv()
# CONFIG
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL_NAME = "mixtral-8x7b-32768"
st.set_page_config(page_title="Lexicon - Policy Explainer", layout="centered")
st.markdown("<h1 style='text-align: center;'>📜 Lexicon: Policy Explainer Bot</h1>", unsafe_allow_html=True)
st.markdown("This app explains, summarizes, and highlights risks in large policy or T&C documents.")
uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
clipboard_input = st.text_area("Or paste policy text here", height=200)
if uploaded_file or clipboard_input:
with st.spinner("Processing document..."):
# Step 1: Extract Text
def extract_text_from_pdf(file):
doc = fitz.open(stream=file.read(), filetype="pdf")
return " ".join(page.get_text() for page in doc)
raw_text = extract_text_from_pdf(uploaded_file) if uploaded_file else clipboard_input
# Step 2: Split and Embed
sentences = raw_text.split(". ")
model = SentenceTransformer(EMBEDDING_MODEL_NAME)
embeddings = model.encode(sentences)
# Step 3: Create Vector Store
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
retriever = FAISS(embedding_function=lambda x: model.encode(x), index=index)
documents = [Document(page_content=s) for s in sentences]
retriever = FAISS.from_documents(documents, HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)).as_retriever()
# Step 4: LLM RAG
llm = Groq(api_key=GROQ_API_KEY, model=GROQ_MODEL_NAME)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
st.success("Document processed! You can now ask questions.")
query = st.text_input("Ask a question about the document:")
if query:
with st.spinner("Generating answer..."):
result = qa_chain.run(query)
st.markdown("### 🧠 Answer:")
st.markdown(result)
if st.button("Suggest key risks"):
risk_prompt = "List any risks or obligations a user should be aware of from this document."
with st.spinner("Identifying risks..."):
risk_result = qa_chain.run(risk_prompt)
st.markdown("### ⚠️ Risks & Concerns:")
st.markdown(risk_result)