dcata004 commited on
Commit
06922f2
·
verified ·
1 Parent(s): f9e340b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -45
app.py CHANGED
@@ -2,11 +2,12 @@ import os
2
  import sys
3
 
4
  # --- HUGGING FACE CHROMADB FIX ---
5
- # This is critical for deployment on HF Spaces to prevent SQLite errors
6
  try:
7
  __import__('pysqlite3')
8
  sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
9
  except ImportError:
 
10
  pass
11
  # ---------------------------------
12
 
@@ -15,10 +16,12 @@ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
15
  from langchain_community.document_loaders import PyPDFLoader
16
  from langchain_text_splitters import RecursiveCharacterTextSplitter
17
  from langchain_chroma import Chroma
 
 
18
  from langchain.chains import RetrievalQA
19
  from langchain.prompts import PromptTemplate
20
 
21
- # Global variables to store state (sufficient for demo purposes)
22
  vectorstore = None
23
  qa_chain = None
24
 
@@ -39,21 +42,21 @@ def process_pdf(file_path, api_key):
39
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
40
  splits = text_splitter.split_documents(docs)
41
 
42
- # 3. Embed & Store (Chroma)
 
43
  embeddings = OpenAIEmbeddings()
44
  vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
45
 
46
- # 4. Create Retrieval Chain with "Auditor" Persona
47
  llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
48
 
49
- # Custom Prompt to enforce "Audit" style behavior
50
  audit_template = """You are Veritas, an AI Compliance Auditor.
51
  Use the following pieces of context to answer the question at the end.
52
 
53
  RULES:
54
- 1. If the answer is in the text, state it clearly and reference the context.
55
  2. If the answer is NOT in the text, you must explicitly state: "FAIL: Information not found in source document."
56
- 3. Do not hallucinate or guess.
57
 
58
  Context: {context}
59
 
@@ -70,10 +73,10 @@ def process_pdf(file_path, api_key):
70
  chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
71
  )
72
 
73
- return "✅ Document Processed Successfully. The Veritas Auditor is ready."
74
 
75
  except Exception as e:
76
- return f"❌ Error processing document: {str(e)}"
77
 
78
  def audit_query(query):
79
  global qa_chain
@@ -86,47 +89,24 @@ def audit_query(query):
86
  except Exception as e:
87
  return f"Error: {str(e)}"
88
 
89
- # --- GRADIO INTERFACE ---
90
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="slate")) as demo:
91
-
92
- gr.Markdown(
93
- """
94
- # 🛡️ Veritas: AI Compliance Auditor
95
- ### Automated RAG Hallucination Detection for Financial Documentation
96
- """
97
- )
98
 
99
  with gr.Row():
100
- with gr.Column(scale=1):
101
- api_input = gr.Textbox(
102
- label="OpenAI API Key",
103
- type="password",
104
- placeholder="sk-..."
105
- )
106
- file_input = gr.File(
107
- label="Upload Financial Report (PDF)",
108
- file_types=[".pdf"]
109
- )
110
- upload_btn = gr.Button("Initialize Auditor", variant="primary")
111
- status_output = gr.Textbox(label="System Status", interactive=False)
112
 
113
- with gr.Column(scale=2):
114
  query_input = gr.Textbox(label="Audit Query")
115
- audit_btn = gr.Button("Run Audit Check")
116
- response_output = gr.Textbox(label="Auditor Verdict", lines=10)
117
 
118
- # Button Actions
119
- upload_btn.click(
120
- process_pdf,
121
- inputs=[file_input, api_input],
122
- outputs=status_output
123
- )
124
-
125
- audit_btn.click(
126
- audit_query,
127
- inputs=query_input,
128
- outputs=response_output
129
- )
130
 
131
  if __name__ == "__main__":
132
  demo.launch()
 
2
  import sys
3
 
4
  # --- HUGGING FACE CHROMADB FIX ---
5
+ # Forces the use of pysqlite3-binary to avoid DB crashes
6
  try:
7
  __import__('pysqlite3')
8
  sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
9
  except ImportError:
10
+ # If this fails, we continue, but it might crash later if the system sqlite is old
11
  pass
12
  # ---------------------------------
13
 
 
16
  from langchain_community.document_loaders import PyPDFLoader
17
  from langchain_text_splitters import RecursiveCharacterTextSplitter
18
  from langchain_chroma import Chroma
19
+
20
+ # UPDATED IMPORT: This is the stable path for RetrievalQA
21
  from langchain.chains import RetrievalQA
22
  from langchain.prompts import PromptTemplate
23
 
24
+ # Global variables
25
  vectorstore = None
26
  qa_chain = None
27
 
 
42
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
43
  splits = text_splitter.split_documents(docs)
44
 
45
+ # 3. Embed & Store
46
+ # We explicitly use the lighter embedding model to save cost/time
47
  embeddings = OpenAIEmbeddings()
48
  vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
49
 
50
+ # 4. Create Chain
51
  llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
52
 
 
53
  audit_template = """You are Veritas, an AI Compliance Auditor.
54
  Use the following pieces of context to answer the question at the end.
55
 
56
  RULES:
57
+ 1. If the answer is in the text, state it clearly.
58
  2. If the answer is NOT in the text, you must explicitly state: "FAIL: Information not found in source document."
59
+ 3. Do not hallucinate.
60
 
61
  Context: {context}
62
 
 
73
  chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
74
  )
75
 
76
+ return "✅ Document Processed. Veritas is ready."
77
 
78
  except Exception as e:
79
+ return f"❌ Error: {str(e)}"
80
 
81
  def audit_query(query):
82
  global qa_chain
 
89
  except Exception as e:
90
  return f"Error: {str(e)}"
91
 
92
+ # --- INTERFACE ---
93
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
94
+ gr.Markdown("# 🛡️ Veritas: AI Compliance Auditor")
 
 
 
 
 
 
95
 
96
  with gr.Row():
97
+ with gr.Column():
98
+ api_input = gr.Textbox(label="OpenAI API Key", type="password")
99
+ file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
100
+ upload_btn = gr.Button("Initialize", variant="primary")
101
+ status = gr.Textbox(label="Status", interactive=False)
 
 
 
 
 
 
 
102
 
103
+ with gr.Column():
104
  query_input = gr.Textbox(label="Audit Query")
105
+ audit_btn = gr.Button("Run Audit")
106
+ output = gr.Textbox(label="Verdict", lines=10)
107
 
108
+ upload_btn.click(process_pdf, inputs=[file_input, api_input], outputs=status)
109
+ audit_btn.click(audit_query, inputs=query_input, outputs=output)
 
 
 
 
 
 
 
 
 
 
110
 
111
  if __name__ == "__main__":
112
  demo.launch()