Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import fitz # PyMuPDF | |
| import os | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.docstore.document import Document | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms import Groq | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # CONFIG | |
| EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| GROQ_MODEL_NAME = "mixtral-8x7b-32768" | |
| st.set_page_config(page_title="Lexicon - Policy Explainer", layout="centered") | |
| st.markdown("<h1 style='text-align: center;'>📜 Lexicon: Policy Explainer Bot</h1>", unsafe_allow_html=True) | |
| st.markdown("This app explains, summarizes, and highlights risks in large policy or T&C documents.") | |
| uploaded_file = st.file_uploader("Upload PDF", type=["pdf"]) | |
| clipboard_input = st.text_area("Or paste policy text here", height=200) | |
| if uploaded_file or clipboard_input: | |
| with st.spinner("Processing document..."): | |
| # Step 1: Extract Text | |
| def extract_text_from_pdf(file): | |
| doc = fitz.open(stream=file.read(), filetype="pdf") | |
| return " ".join(page.get_text() for page in doc) | |
| raw_text = extract_text_from_pdf(uploaded_file) if uploaded_file else clipboard_input | |
| # Step 2: Split and Embed | |
| sentences = raw_text.split(". ") | |
| model = SentenceTransformer(EMBEDDING_MODEL_NAME) | |
| embeddings = model.encode(sentences) | |
| # Step 3: Create Vector Store | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(embeddings) | |
| retriever = FAISS(embedding_function=lambda x: model.encode(x), index=index) | |
| documents = [Document(page_content=s) for s in sentences] | |
| retriever = FAISS.from_documents(documents, HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)).as_retriever() | |
| # Step 4: LLM RAG | |
| llm = Groq(api_key=GROQ_API_KEY, model=GROQ_MODEL_NAME) | |
| qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
| st.success("Document processed! You can now ask questions.") | |
| query = st.text_input("Ask a question about the document:") | |
| if query: | |
| with st.spinner("Generating answer..."): | |
| result = qa_chain.run(query) | |
| st.markdown("### 🧠 Answer:") | |
| st.markdown(result) | |
| if st.button("Suggest key risks"): | |
| risk_prompt = "List any risks or obligations a user should be aware of from this document." | |
| with st.spinner("Identifying risks..."): | |
| risk_result = qa_chain.run(risk_prompt) | |
| st.markdown("### ⚠️ Risks & Concerns:") | |
| st.markdown(risk_result) | |