Spaces:

Cata-Risk-Lab
/

Veritas-Hallucination-Auditor

Build error

App Files Files Community

dcata004 commited on Jan 8

Commit

1a622af

verified ·

1 Parent(s): 280ebde

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -26

app.py CHANGED Viewed

@@ -1,52 +1,54 @@
 import os
 import gradio as gr
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 from langchain.chains import RetrievalQA
 from datasets import Dataset
 from ragas import evaluate
 from ragas.metrics import faithfulness, answer_relevancy
-# --- 1. KEY LOADER & DIAGNOSTICS ---
-# Try to load the key from Hugging Face Secrets
 api_key = os.getenv("OPENAI_API_KEY")
-# Diagnostic: Determine status without revealing the key
 if api_key:
     key_status = "✅ ACTIVE (Loaded from Secrets)"
-    # FORCE the environment variable for Ragas (which relies on os.environ)
     os.environ["OPENAI_API_KEY"] = api_key
 else:
     key_status = "❌ MISSING (Check Settings -> Secrets)"
 def audit_rag(pdf_file, user_question):
-    """
-    1. Reads PDF
-    2. Answers Question (using your Key)
-    3. Audits the Answer (using your Key)
-    """
     if not api_key:
-        return "ERROR: API Key is missing. Please add OPENAI_API_KEY in Settings -> Secrets.", "ERROR", "0", "0"
     if not pdf_file or not user_question:
-        return "Please upload a PDF and ask a question.", "Waiting for input...", "0.00", "0.00"
     try:
-        # 1. LOAD & PROCESS DOCUMENT
         loader = PyPDFLoader(pdf_file.name)
         documents = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
         texts = text_splitter.split_documents(documents)
-        # 2. CREATE RAG ENGINE (Explicitly passing API Key)
         embeddings = OpenAIEmbeddings(openai_api_key=api_key)
         db = Chroma.from_documents(texts, embeddings)
         retriever = db.as_retriever(search_kwargs={"k": 3})
-        # Explicitly passing API Key to the LLM
         llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key=api_key)
         qa_chain = RetrievalQA.from_chain_type(
@@ -56,13 +58,12 @@ def audit_rag(pdf_file, user_question):
             return_source_documents=True
         )
-        # 3. GENERATE ANSWER
         result = qa_chain.invoke({"query": user_question})
         generated_answer = result['result']
         source_docs = [doc.page_content for doc in result['source_documents']]
-        # 4. RUN THE AUDIT (RAGAS)
-        # Ragas requires the 'llm' and 'embeddings' to be passed explicitly to avoid config errors
         data = {
             'question': [user_question],
             'answer': [generated_answer],
@@ -71,19 +72,17 @@ def audit_rag(pdf_file, user_question):
         }
         dataset = Dataset.from_dict(data)
-        # Evaluate using the explicitly configured LLM/Embeddings
         score = evaluate(
             dataset=dataset,
             metrics=[faithfulness, answer_relevancy],
-            llm=llm,       # Force Ragas to use our authenticated LLM
-            embeddings=embeddings # Force Ragas to use our authenticated Embeddings
         )
         audit_results = score.to_pandas()
         faith_score = audit_results.iloc[0]['faithfulness']
         relevancy_score = audit_results.iloc[0]['answer_relevancy']
-        # 5. GENERATE VERDICT
         verdict = "✅ PASS" if faith_score > 0.8 else "❌ FAIL (Hallucination Detected)"
         return generated_answer, verdict, f"{faith_score:.2f}", f"{relevancy_score:.2f}"
@@ -91,11 +90,11 @@ def audit_rag(pdf_file, user_question):
     except Exception as e:
         return f"System Error: {str(e)}", "ERROR", "0", "0"
-# --- USER INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# ⚖️ Veritas: AI Hallucination Auditor")
-    gr.Markdown(f"**System Status:** {key_status}") # Display key status clearly
-    gr.Markdown("Upload a document (e.g., Financial Report) and ask a question. This tool will answer AND verify if the AI stuck to the facts.")
     with gr.Row():
         with gr.Column():

 import os
+import sys
+# --- 1. CHROMA DB FIX FOR HUGGING FACE ---
+# ChromaDB requires a newer version of sqlite3 than the one pre-installed on Linux
+try:
+    __import__('pysqlite3')
+    sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
+except ImportError:
+    pass # Pass if running locally or if not available
 import gradio as gr
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+# STABLE IMPORT (Matches langchain==0.1.20)
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
+# STABLE IMPORT
 from langchain.chains import RetrievalQA
 from datasets import Dataset
 from ragas import evaluate
 from ragas.metrics import faithfulness, answer_relevancy
+# --- 2. KEY LOADER ---
 api_key = os.getenv("OPENAI_API_KEY")
 if api_key:
     key_status = "✅ ACTIVE (Loaded from Secrets)"
     os.environ["OPENAI_API_KEY"] = api_key
 else:
     key_status = "❌ MISSING (Check Settings -> Secrets)"
 def audit_rag(pdf_file, user_question):
     if not api_key:
+        return "ERROR: API Key is missing.", "ERROR", "0", "0"
     if not pdf_file or not user_question:
+        return "Please upload a PDF and ask a question.", "Waiting...", "0.00", "0.00"
     try:
+        # Load & Split
         loader = PyPDFLoader(pdf_file.name)
         documents = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
         texts = text_splitter.split_documents(documents)
+        # RAG Engine
         embeddings = OpenAIEmbeddings(openai_api_key=api_key)
         db = Chroma.from_documents(texts, embeddings)
         retriever = db.as_retriever(search_kwargs={"k": 3})
         llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key=api_key)
         qa_chain = RetrievalQA.from_chain_type(
             return_source_documents=True
         )
+        # Answer
         result = qa_chain.invoke({"query": user_question})
         generated_answer = result['result']
         source_docs = [doc.page_content for doc in result['source_documents']]
+        # Ragas Audit
         data = {
             'question': [user_question],
             'answer': [generated_answer],
         }
         dataset = Dataset.from_dict(data)
         score = evaluate(
             dataset=dataset,
             metrics=[faithfulness, answer_relevancy],
+            llm=llm,
+            embeddings=embeddings
         )
         audit_results = score.to_pandas()
         faith_score = audit_results.iloc[0]['faithfulness']
         relevancy_score = audit_results.iloc[0]['answer_relevancy']
         verdict = "✅ PASS" if faith_score > 0.8 else "❌ FAIL (Hallucination Detected)"
         return generated_answer, verdict, f"{faith_score:.2f}", f"{relevancy_score:.2f}"
     except Exception as e:
         return f"System Error: {str(e)}", "ERROR", "0", "0"
+# UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# ⚖️ Veritas: AI Hallucination Auditor")
+    gr.Markdown(f"**System Status:** {key_status}")
+    gr.Markdown("Upload a document (e.g., Financial Report) and ask a question.")
     with gr.Row():
         with gr.Column():