dcata004 commited on
Commit
6fd370d
·
verified ·
1 Parent(s): b38dcd4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ # --- HUGGING FACE CHROMADB FIX ---
5
+ # This is critical for deployment on HF Spaces to prevent SQLite errors
6
+ try:
7
+ __import__('pysqlite3')
8
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
9
+ except ImportError:
10
+ pass
11
+ # ---------------------------------
12
+
13
+ import gradio as gr
14
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
15
+ from langchain_community.document_loaders import PyPDFLoader
16
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
17
+ from langchain_chroma import Chroma
18
+ from langchain.chains import RetrievalQA
19
+ from langchain.prompts import PromptTemplate
20
+
21
+ # Global variables to store state (sufficient for demo purposes)
22
+ vectorstore = None
23
+ qa_chain = None
24
+
25
+ def process_pdf(file_path, api_key):
26
+ global vectorstore, qa_chain
27
+
28
+ if not api_key:
29
+ return "⚠️ Error: Please enter your OpenAI API Key first."
30
+
31
+ os.environ["OPENAI_API_KEY"] = api_key
32
+
33
+ try:
34
+ # 1. Load PDF
35
+ loader = PyPDFLoader(file_path)
36
+ docs = loader.load()
37
+
38
+ # 2. Split Text
39
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
40
+ splits = text_splitter.split_documents(docs)
41
+
42
+ # 3. Embed & Store (Chroma)
43
+ embeddings = OpenAIEmbeddings()
44
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
45
+
46
+ # 4. Create Retrieval Chain with "Auditor" Persona
47
+ llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
48
+
49
+ # Custom Prompt to enforce "Audit" style behavior
50
+ audit_template = """You are Veritas, an AI Compliance Auditor.
51
+ Use the following pieces of context to answer the question at the end.
52
+
53
+ RULES:
54
+ 1. If the answer is in the text, state it clearly and reference the context.
55
+ 2. If the answer is NOT in the text, you must explicitly state: "FAIL: Information not found in source document."
56
+ 3. Do not hallucinate or guess.
57
+
58
+ Context: {context}
59
+
60
+ Question: {question}
61
+
62
+ Verdict:"""
63
+
64
+ QA_CHAIN_PROMPT = PromptTemplate.from_template(audit_template)
65
+
66
+ qa_chain = RetrievalQA.from_chain_type(
67
+ llm=llm,
68
+ chain_type="stuff",
69
+ retriever=vectorstore.as_retriever(),
70
+ chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
71
+ )
72
+
73
+ return "✅ Document Processed Successfully. The Veritas Auditor is ready."
74
+
75
+ except Exception as e:
76
+ return f"❌ Error processing document: {str(e)}"
77
+
78
+ def audit_query(query):
79
+ global qa_chain
80
+ if not qa_chain:
81
+ return "⚠️ Please upload a document first."
82
+
83
+ try:
84
+ response = qa_chain.invoke(query)
85
+ return response['result']
86
+ except Exception as e:
87
+ return f"Error: {str(e)}"
88
+
89
+ # --- GRADIO INTERFACE ---
90
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="slate")) as demo:
91
+
92
+ gr.Markdown(
93
+ """
94
+ # 🛡️ Veritas: AI Compliance Auditor
95
+ ### Automated RAG Hallucination Detection for Financial Documentation
96
+ """
97
+ )
98
+
99
+ with gr.Row():
100
+ with gr.Column(scale=1):
101
+ api_input = gr.Textbox(
102
+ label="OpenAI API Key",
103
+ type="password",
104
+ placeholder="sk-..."
105
+ )
106
+ file_input = gr.File(
107
+ label="Upload Financial Report (PDF)",
108
+ file_types=[".pdf"]
109
+ )
110
+ upload_btn = gr.Button("Initialize Auditor", variant="primary")
111
+ status_output = gr.Textbox(label="System Status", interactive=False)
112
+
113
+ with gr.Column(scale=2):
114
+ query_input = gr.Textbox(label="Audit Query")
115
+ audit_btn = gr.Button("Run Audit Check")
116
+ response_output = gr.Textbox(label="Auditor Verdict", lines=10)
117
+
118
+ # Button Actions
119
+ upload_btn.click(
120
+ process_pdf,
121
+ inputs=[file_input, api_input],
122
+ outputs=status_output
123
+ )
124
+
125
+ audit_btn.click(
126
+ audit_query,
127
+ inputs=query_input,
128
+ outputs=response_output
129
+ )
130
+
131
+ if __name__ == "__main__":
132
+ demo.launch()