dcata004 commited on
Commit
1a622af
Β·
verified Β·
1 Parent(s): 280ebde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -26
app.py CHANGED
@@ -1,52 +1,54 @@
1
  import os
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
5
- from langchain_text_splitters import RecursiveCharacterTextSplitter
 
6
  from langchain_community.vectorstores import Chroma
 
7
  from langchain.chains import RetrievalQA
8
  from datasets import Dataset
9
  from ragas import evaluate
10
  from ragas.metrics import faithfulness, answer_relevancy
11
 
12
- # --- 1. KEY LOADER & DIAGNOSTICS ---
13
- # Try to load the key from Hugging Face Secrets
14
  api_key = os.getenv("OPENAI_API_KEY")
15
 
16
- # Diagnostic: Determine status without revealing the key
17
  if api_key:
18
  key_status = "βœ… ACTIVE (Loaded from Secrets)"
19
- # FORCE the environment variable for Ragas (which relies on os.environ)
20
  os.environ["OPENAI_API_KEY"] = api_key
21
  else:
22
  key_status = "❌ MISSING (Check Settings -> Secrets)"
23
 
24
  def audit_rag(pdf_file, user_question):
25
- """
26
- 1. Reads PDF
27
- 2. Answers Question (using your Key)
28
- 3. Audits the Answer (using your Key)
29
- """
30
  if not api_key:
31
- return "ERROR: API Key is missing. Please add OPENAI_API_KEY in Settings -> Secrets.", "ERROR", "0", "0"
32
 
33
  if not pdf_file or not user_question:
34
- return "Please upload a PDF and ask a question.", "Waiting for input...", "0.00", "0.00"
35
 
36
  try:
37
- # 1. LOAD & PROCESS DOCUMENT
38
  loader = PyPDFLoader(pdf_file.name)
39
  documents = loader.load()
40
-
41
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
42
  texts = text_splitter.split_documents(documents)
43
 
44
- # 2. CREATE RAG ENGINE (Explicitly passing API Key)
45
  embeddings = OpenAIEmbeddings(openai_api_key=api_key)
46
  db = Chroma.from_documents(texts, embeddings)
47
  retriever = db.as_retriever(search_kwargs={"k": 3})
48
 
49
- # Explicitly passing API Key to the LLM
50
  llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key=api_key)
51
 
52
  qa_chain = RetrievalQA.from_chain_type(
@@ -56,13 +58,12 @@ def audit_rag(pdf_file, user_question):
56
  return_source_documents=True
57
  )
58
 
59
- # 3. GENERATE ANSWER
60
  result = qa_chain.invoke({"query": user_question})
61
  generated_answer = result['result']
62
  source_docs = [doc.page_content for doc in result['source_documents']]
63
 
64
- # 4. RUN THE AUDIT (RAGAS)
65
- # Ragas requires the 'llm' and 'embeddings' to be passed explicitly to avoid config errors
66
  data = {
67
  'question': [user_question],
68
  'answer': [generated_answer],
@@ -71,19 +72,17 @@ def audit_rag(pdf_file, user_question):
71
  }
72
  dataset = Dataset.from_dict(data)
73
 
74
- # Evaluate using the explicitly configured LLM/Embeddings
75
  score = evaluate(
76
  dataset=dataset,
77
  metrics=[faithfulness, answer_relevancy],
78
- llm=llm, # Force Ragas to use our authenticated LLM
79
- embeddings=embeddings # Force Ragas to use our authenticated Embeddings
80
  )
81
 
82
  audit_results = score.to_pandas()
83
  faith_score = audit_results.iloc[0]['faithfulness']
84
  relevancy_score = audit_results.iloc[0]['answer_relevancy']
85
 
86
- # 5. GENERATE VERDICT
87
  verdict = "βœ… PASS" if faith_score > 0.8 else "❌ FAIL (Hallucination Detected)"
88
 
89
  return generated_answer, verdict, f"{faith_score:.2f}", f"{relevancy_score:.2f}"
@@ -91,11 +90,11 @@ def audit_rag(pdf_file, user_question):
91
  except Exception as e:
92
  return f"System Error: {str(e)}", "ERROR", "0", "0"
93
 
94
- # --- USER INTERFACE ---
95
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
96
  gr.Markdown("# βš–οΈ Veritas: AI Hallucination Auditor")
97
- gr.Markdown(f"**System Status:** {key_status}") # Display key status clearly
98
- gr.Markdown("Upload a document (e.g., Financial Report) and ask a question. This tool will answer AND verify if the AI stuck to the facts.")
99
 
100
  with gr.Row():
101
  with gr.Column():
 
1
  import os
2
+ import sys
3
+
4
+ # --- 1. CHROMA DB FIX FOR HUGGING FACE ---
5
+ # ChromaDB requires a newer version of sqlite3 than the one pre-installed on Linux
6
+ try:
7
+ __import__('pysqlite3')
8
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
9
+ except ImportError:
10
+ pass # Pass if running locally or if not available
11
+
12
  import gradio as gr
13
  from langchain_community.document_loaders import PyPDFLoader
14
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
15
+ # STABLE IMPORT (Matches langchain==0.1.20)
16
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
17
  from langchain_community.vectorstores import Chroma
18
+ # STABLE IMPORT
19
  from langchain.chains import RetrievalQA
20
  from datasets import Dataset
21
  from ragas import evaluate
22
  from ragas.metrics import faithfulness, answer_relevancy
23
 
24
+ # --- 2. KEY LOADER ---
 
25
  api_key = os.getenv("OPENAI_API_KEY")
26
 
 
27
  if api_key:
28
  key_status = "βœ… ACTIVE (Loaded from Secrets)"
 
29
  os.environ["OPENAI_API_KEY"] = api_key
30
  else:
31
  key_status = "❌ MISSING (Check Settings -> Secrets)"
32
 
33
  def audit_rag(pdf_file, user_question):
 
 
 
 
 
34
  if not api_key:
35
+ return "ERROR: API Key is missing.", "ERROR", "0", "0"
36
 
37
  if not pdf_file or not user_question:
38
+ return "Please upload a PDF and ask a question.", "Waiting...", "0.00", "0.00"
39
 
40
  try:
41
+ # Load & Split
42
  loader = PyPDFLoader(pdf_file.name)
43
  documents = loader.load()
 
44
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
45
  texts = text_splitter.split_documents(documents)
46
 
47
+ # RAG Engine
48
  embeddings = OpenAIEmbeddings(openai_api_key=api_key)
49
  db = Chroma.from_documents(texts, embeddings)
50
  retriever = db.as_retriever(search_kwargs={"k": 3})
51
 
 
52
  llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key=api_key)
53
 
54
  qa_chain = RetrievalQA.from_chain_type(
 
58
  return_source_documents=True
59
  )
60
 
61
+ # Answer
62
  result = qa_chain.invoke({"query": user_question})
63
  generated_answer = result['result']
64
  source_docs = [doc.page_content for doc in result['source_documents']]
65
 
66
+ # Ragas Audit
 
67
  data = {
68
  'question': [user_question],
69
  'answer': [generated_answer],
 
72
  }
73
  dataset = Dataset.from_dict(data)
74
 
 
75
  score = evaluate(
76
  dataset=dataset,
77
  metrics=[faithfulness, answer_relevancy],
78
+ llm=llm,
79
+ embeddings=embeddings
80
  )
81
 
82
  audit_results = score.to_pandas()
83
  faith_score = audit_results.iloc[0]['faithfulness']
84
  relevancy_score = audit_results.iloc[0]['answer_relevancy']
85
 
 
86
  verdict = "βœ… PASS" if faith_score > 0.8 else "❌ FAIL (Hallucination Detected)"
87
 
88
  return generated_answer, verdict, f"{faith_score:.2f}", f"{relevancy_score:.2f}"
 
90
  except Exception as e:
91
  return f"System Error: {str(e)}", "ERROR", "0", "0"
92
 
93
+ # UI
94
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
95
  gr.Markdown("# βš–οΈ Veritas: AI Hallucination Auditor")
96
+ gr.Markdown(f"**System Status:** {key_status}")
97
+ gr.Markdown("Upload a document (e.g., Financial Report) and ask a question.")
98
 
99
  with gr.Row():
100
  with gr.Column():