dcata004 commited on
Commit
7a50391
·
verified ·
1 Parent(s): 2dfb9db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -79
app.py CHANGED
@@ -2,112 +2,119 @@ import os
2
  import gradio as gr
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 
 
 
 
 
 
5
 
6
- # --- DEBUG & SETUP ---
 
7
  api_key = os.getenv("OPENAI_API_KEY")
8
 
9
- # Check if Key is missing and warn the logs
10
- if not api_key:
11
- print("⚠️ CRITICAL ERROR: OPENAI_API_KEY is missing from Secrets.")
12
- # Fallback: Try to grab it if user named it differently
13
- api_key = os.getenv("OPENAI_KEY") or os.getenv("key")
14
-
15
- # Force set the variable for LangChain
16
- os.environ["OPENAI_API_KEY"] = str(api_key)
17
- from langchain_community.document_loaders import PyPDFLoader
18
- from langchain_text_splitters import RecursiveCharacterTextSplitter
19
- from langchain_chroma import Chroma
20
 
21
- # UPDATED IMPORT: This is the stable path for RetrievalQA
22
- from langchain.chains import RetrievalQA
23
- from langchain.prompts import PromptTemplate
 
 
 
 
 
24
 
25
- # Global variables
26
- vectorstore = None
27
- qa_chain = None
28
 
29
- def process_pdf(file_path, api_key):
30
- global vectorstore, qa_chain
31
-
32
- if not api_key:
33
- return "⚠️ Error: Please enter your OpenAI API Key first."
34
-
35
- os.environ["OPENAI_API_KEY"] = api_key
36
-
37
  try:
38
- # 1. Load PDF
39
- loader = PyPDFLoader(file_path)
40
- docs = loader.load()
41
 
42
- # 2. Split Text
43
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
44
- splits = text_splitter.split_documents(docs)
45
 
46
- # 3. Embed & Store
47
- # We explicitly use the lighter embedding model to save cost/time
48
- embeddings = OpenAIEmbeddings()
49
- vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
50
 
51
- # 4. Create Chain
52
- llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
53
 
54
- audit_template = """You are Veritas, an AI Compliance Auditor.
55
- Use the following pieces of context to answer the question at the end.
56
-
57
- RULES:
58
- 1. If the answer is in the text, state it clearly.
59
- 2. If the answer is NOT in the text, you must explicitly state: "FAIL: Information not found in source document."
60
- 3. Do not hallucinate.
61
-
62
- Context: {context}
63
-
64
- Question: {question}
65
 
66
- Verdict:"""
 
 
 
67
 
68
- QA_CHAIN_PROMPT = PromptTemplate.from_template(audit_template)
 
 
 
 
 
 
 
 
69
 
70
- qa_chain = RetrievalQA.from_chain_type(
71
- llm=llm,
72
- chain_type="stuff",
73
- retriever=vectorstore.as_retriever(),
74
- chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
 
75
  )
76
 
77
- return "✅ Document Processed. Veritas is ready."
 
 
 
 
 
78
 
79
- except Exception as e:
80
- return f"❌ Error: {str(e)}"
81
 
82
- def audit_query(query):
83
- global qa_chain
84
- if not qa_chain:
85
- return "⚠️ Please upload a document first."
86
-
87
- try:
88
- response = qa_chain.invoke(query)
89
- return response['result']
90
  except Exception as e:
91
- return f"Error: {str(e)}"
92
 
93
- # --- INTERFACE ---
94
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
95
- gr.Markdown("# 🛡️ Veritas: AI Compliance Auditor")
 
 
96
 
97
  with gr.Row():
98
  with gr.Column():
99
- api_input = gr.Textbox(label="OpenAI API Key", type="password")
100
- file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
101
- upload_btn = gr.Button("Initialize", variant="primary")
102
- status = gr.Textbox(label="Status", interactive=False)
103
-
104
  with gr.Column():
105
- query_input = gr.Textbox(label="Audit Query")
106
- audit_btn = gr.Button("Run Audit")
107
- output = gr.Textbox(label="Verdict", lines=10)
 
 
108
 
109
- upload_btn.click(process_pdf, inputs=[file_input, api_input], outputs=status)
110
- audit_btn.click(audit_query, inputs=query_input, outputs=output)
 
 
 
111
 
112
  if __name__ == "__main__":
113
  demo.launch()
 
2
  import gradio as gr
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain_community.vectorstores import Chroma
7
+ from langchain.chains import RetrievalQA
8
+ from datasets import Dataset
9
+ from ragas import evaluate
10
+ from ragas.metrics import faithfulness, answer_relevancy
11
 
12
+ # --- 1. KEY LOADER & DIAGNOSTICS ---
13
+ # Try to load the key from Hugging Face Secrets
14
  api_key = os.getenv("OPENAI_API_KEY")
15
 
16
+ # Diagnostic: Determine status without revealing the key
17
+ if api_key:
18
+ key_status = "✅ ACTIVE (Loaded from Secrets)"
19
+ # FORCE the environment variable for Ragas (which relies on os.environ)
20
+ os.environ["OPENAI_API_KEY"] = api_key
21
+ else:
22
+ key_status = "❌ MISSING (Check Settings -> Secrets)"
 
 
 
 
23
 
24
+ def audit_rag(pdf_file, user_question):
25
+ """
26
+ 1. Reads PDF
27
+ 2. Answers Question (using your Key)
28
+ 3. Audits the Answer (using your Key)
29
+ """
30
+ if not api_key:
31
+ return "ERROR: API Key is missing. Please add OPENAI_API_KEY in Settings -> Secrets.", "ERROR", "0", "0"
32
 
33
+ if not pdf_file or not user_question:
34
+ return "Please upload a PDF and ask a question.", "Waiting for input...", "0.00", "0.00"
 
35
 
 
 
 
 
 
 
 
 
36
  try:
37
+ # 1. LOAD & PROCESS DOCUMENT
38
+ loader = PyPDFLoader(pdf_file.name)
39
+ documents = loader.load()
40
 
 
41
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
42
+ texts = text_splitter.split_documents(documents)
43
 
44
+ # 2. CREATE RAG ENGINE (Explicitly passing API Key)
45
+ embeddings = OpenAIEmbeddings(openai_api_key=api_key)
46
+ db = Chroma.from_documents(texts, embeddings)
47
+ retriever = db.as_retriever(search_kwargs={"k": 3})
48
 
49
+ # Explicitly passing API Key to the LLM
50
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key=api_key)
51
 
52
+ qa_chain = RetrievalQA.from_chain_type(
53
+ llm=llm,
54
+ chain_type="stuff",
55
+ retriever=retriever,
56
+ return_source_documents=True
57
+ )
 
 
 
 
 
58
 
59
+ # 3. GENERATE ANSWER
60
+ result = qa_chain.invoke({"query": user_question})
61
+ generated_answer = result['result']
62
+ source_docs = [doc.page_content for doc in result['source_documents']]
63
 
64
+ # 4. RUN THE AUDIT (RAGAS)
65
+ # Ragas requires the 'llm' and 'embeddings' to be passed explicitly to avoid config errors
66
+ data = {
67
+ 'question': [user_question],
68
+ 'answer': [generated_answer],
69
+ 'contexts': [source_docs],
70
+ 'ground_truth': [""]
71
+ }
72
+ dataset = Dataset.from_dict(data)
73
 
74
+ # Evaluate using the explicitly configured LLM/Embeddings
75
+ score = evaluate(
76
+ dataset=dataset,
77
+ metrics=[faithfulness, answer_relevancy],
78
+ llm=llm, # Force Ragas to use our authenticated LLM
79
+ embeddings=embeddings # Force Ragas to use our authenticated Embeddings
80
  )
81
 
82
+ audit_results = score.to_pandas()
83
+ faith_score = audit_results.iloc[0]['faithfulness']
84
+ relevancy_score = audit_results.iloc[0]['answer_relevancy']
85
+
86
+ # 5. GENERATE VERDICT
87
+ verdict = "✅ PASS" if faith_score > 0.8 else "❌ FAIL (Hallucination Detected)"
88
 
89
+ return generated_answer, verdict, f"{faith_score:.2f}", f"{relevancy_score:.2f}"
 
90
 
 
 
 
 
 
 
 
 
91
  except Exception as e:
92
+ return f"System Error: {str(e)}", "ERROR", "0", "0"
93
 
94
+ # --- USER INTERFACE ---
95
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
96
+ gr.Markdown("# ⚖️ Veritas: AI Hallucination Auditor")
97
+ gr.Markdown(f"**System Status:** {key_status}") # Display key status clearly
98
+ gr.Markdown("Upload a document (e.g., Financial Report) and ask a question. This tool will answer AND verify if the AI stuck to the facts.")
99
 
100
  with gr.Row():
101
  with gr.Column():
102
+ file_input = gr.File(label="Upload PDF Evidence", file_types=[".pdf"])
103
+ question_input = gr.Textbox(label="Cross-Examination Question", placeholder="e.g., What was the net profit in Q3?")
104
+ submit_btn = gr.Button("Run Audit", variant="primary")
105
+
 
106
  with gr.Column():
107
+ answer_output = gr.Textbox(label="AI Witness Testimony (Answer)")
108
+ with gr.Row():
109
+ verdict_output = gr.Textbox(label="Verdict")
110
+ faith_output = gr.Textbox(label="Faithfulness Score (0-1)")
111
+ relevance_output = gr.Textbox(label="Relevancy Score")
112
 
113
+ submit_btn.click(
114
+ audit_rag,
115
+ inputs=[file_input, question_input],
116
+ outputs=[answer_output, verdict_output, faith_output, relevance_output]
117
+ )
118
 
119
  if __name__ == "__main__":
120
  demo.launch()