Spaces:

Harishkhawaja
/

Lexicon_Chatbot

Runtime error

App Files Files Community

Harishkhawaja commited on May 18, 2025

Commit

681d2cc

verified ·

1 Parent(s): e5bd2a7

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +70 -24

src/streamlit_app.py CHANGED Viewed

@@ -2,44 +2,80 @@ import streamlit as st
 from sentence_transformers import SentenceTransformer
 import torch
 import faiss
-import os
 import PyPDF2
 from groq import Groq
 # Load embedding model
-model = SentenceTransformer("thenlper/gte-small")
 # Initialize Groq client
-groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 def embed_chunks(chunks):
-    return model.encode(chunks, convert_to_numpy=True)
 def chunk_text(text, chunk_size=500, overlap=100):
     chunks = []
     for i in range(0, len(text), chunk_size - overlap):
-        chunks.append(text[i:i+chunk_size])
     return chunks
 def create_faiss_index(embeddings):
-    dim = embeddings.shape[1]
-    index = faiss.IndexFlatL2(dim)
-    index.add(embeddings)
-    return index
 def search_index(query, index, chunks, top_k=5):
-    query_embedding = embed_chunks([query])
-    distances, indices = index.search(query_embedding, top_k)
-    return [chunks[i] for i in indices[0]]
 def extract_text_from_pdf(file):
-    reader = PyPDF2.PdfReader(file)
-    text = ""
-    for page in reader.pages:
-        text += page.extract_text() or ""
-    return text
 def ask_groq(query, context):
     try:
         completion = groq_client.chat.completions.create(
             messages=[
@@ -61,10 +97,10 @@ def ask_groq(query, context):
     except Exception as e:
         return f"Error from Groq API: {e}"
 # Streamlit app
 st.set_page_config(page_title="Lexicon: Policy Explainer Bot", layout="wide")
 st.title("📜 Lexicon: Understand Policies with Confidence")
 st.markdown("Upload a PDF or paste policy text below. Lexicon will highlight key points and flag potential risks.")
 uploaded_file = st.file_uploader("Upload Policy/T&C PDF", type=["pdf"])
@@ -76,19 +112,29 @@ if uploaded_file or clipboard_text.strip():
     else:
         text = clipboard_text.strip()
     st.success("Document loaded. Processing...")
     chunks = chunk_text(text)
-    embeddings = embed_chunks(chunks)
-    index = create_faiss_index(embeddings)
     with st.expander("🔍 Ask a question about this policy"):
         query = st.text_input("Enter your question")
         if query:
             relevant_chunks = search_index(query, index, chunks)
-            context = "\n\n".join(relevant_chunks)
-            answer = ask_groq(query=query, context=context)
-            st.markdown("**Answer:**")
-            st.info(answer)
     st.markdown("✅ **Ready for follow-up questions.** Ask anything about clauses, risks, or key terms.")
 else:

 from sentence_transformers import SentenceTransformer
 import torch
 import faiss
 import PyPDF2
 from groq import Groq
+import os
+# Check if running in a Hugging Face Space
+HF_SPACE = "HF_SPACE_ID" in os.environ  # Corrected check.  The env var is HF_SPACE_ID, not SPACE_ID
 # Load embedding model
+try:
+    model = SentenceTransformer("thenlper/gte-small")
+except Exception as e:
+    st.error(f"Error loading the Sentence Transformer model: {e}.  Please ensure the correct version of sentence-transformers is in requirements.txt.")
+    # Stop if the model fails to load.  Crucial for HuggingFace
+    st.stop()
 # Initialize Groq client
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+if not GROQ_API_KEY:
+    st.error("GROQ_API_KEY environment variable not set.  The app will not be able to query Groq.")
+    # Don't stop here, allow basic functionality.  Groq features will be unavailable, but the rest can work.
+    groq_client = None
+else:
+    groq_client = Groq(api_key=GROQ_API_KEY)
 def embed_chunks(chunks):
+    try:
+        return model.encode(chunks, convert_to_numpy=True)
+    except Exception as e:
+        st.error(f"Error embedding chunks: {e}")
+        return None  # Important: Handle the error, return None
 def chunk_text(text, chunk_size=500, overlap=100):
     chunks = []
     for i in range(0, len(text), chunk_size - overlap):
+        chunks.append(text[i:i + chunk_size])
     return chunks
 def create_faiss_index(embeddings):
+    try:
+        dim = embeddings.shape[1]
+        index = faiss.IndexFlatL2(dim)
+        index.add(embeddings)
+        return index
+    except Exception as e:
+        st.error(f"Error creating FAISS index: {e}")
+        return None # Important: Handle error
 def search_index(query, index, chunks, top_k=5):
+    try:
+        query_embedding = embed_chunks([query])
+        if query_embedding is None or index is None: # handle errors from embed_chunks or create_faiss_index
+            return []
+        distances, indices = index.search(query_embedding, top_k)
+        return [chunks[i] for i in indices[0]]
+    except Exception as e:
+        st.error(f"Error searching FAISS index: {e}")
+        return []
 def extract_text_from_pdf(file):
+    try:
+        reader = PyPDF2.PdfReader(file)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text() or ""
+        return text
+    except Exception as e:
+        st.error(f"Error extracting text from PDF: {e}")
+        return ""
 def ask_groq(query, context):
+    if groq_client is None:
+        return "Groq API key is not configured.  This feature is unavailable."
     try:
         completion = groq_client.chat.completions.create(
             messages=[
     except Exception as e:
         return f"Error from Groq API: {e}"
 # Streamlit app
 st.set_page_config(page_title="Lexicon: Policy Explainer Bot", layout="wide")
 st.title("📜 Lexicon: Understand Policies with Confidence")
 st.markdown("Upload a PDF or paste policy text below. Lexicon will highlight key points and flag potential risks.")
 uploaded_file = st.file_uploader("Upload Policy/T&C PDF", type=["pdf"])
     else:
         text = clipboard_text.strip()
+    if not text:  # Handle the case where extraction/clipboard yields empty text
+        st.error("No text was extracted from the PDF or provided in the text area. Please check your input.")
+        st.stop()
     st.success("Document loaded. Processing...")
     chunks = chunk_text(text)
+    embeddings = embed_chunks(chunks) # embeddings can be None if error
+    if embeddings is not None:
+        index = create_faiss_index(embeddings) # index can be None if error
+    else:
+        index = None
     with st.expander("🔍 Ask a question about this policy"):
         query = st.text_input("Enter your question")
         if query:
             relevant_chunks = search_index(query, index, chunks)
+            if relevant_chunks: # only call groq if relevant chunks were found.
+                context = "\n\n".join(relevant_chunks)
+                answer = ask_groq(query=query, context=context)
+                st.markdown("**Answer:**")
+                st.info(answer)
+            else:
+                st.info("No relevant information found in the document to answer your question.")
     st.markdown("✅ **Ready for follow-up questions.** Ask anything about clauses, risks, or key terms.")
 else: