prernajeet01 commited on
Commit
36d89ec
·
verified ·
1 Parent(s): 0f6b708

Delete fixed-audit-app.py

Browse files
Files changed (1) hide show
  1. fixed-audit-app.py +0 -373
fixed-audit-app.py DELETED
@@ -1,373 +0,0 @@
1
- import gradio as gr
2
- import os
3
- import tempfile
4
- import pandas as pd
5
- import boto3
6
- from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, UnstructuredPowerPointLoader, UnstructuredExcelLoader
7
- from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from langchain_community.embeddings import OpenAIEmbeddings
9
- from langchain_community.vectorstores import FAISS
10
- from langchain.chains import RetrievalQA
11
- from langchain_aws import ChatBedrock # Updated import
12
- from langchain_openai import ChatOpenAI
13
- from langchain_ollama import OllamaLLM # Updated import
14
- import logging
15
- from huggingface_hub import HfApi
16
- from huggingface_hub.utils import RepositoryNotFoundError
17
-
18
- # Set up logging
19
- logging.basicConfig(
20
- level=logging.INFO,
21
- format='%(asctime)s - %(levelname)s - %(message)s'
22
- )
23
-
24
- def get_api_keys():
25
- """Get API keys from Hugging Face Spaces secrets."""
26
- aws_access_key = os.environ.get("AWS_ACCESS_KEY_ID")
27
- aws_secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY")
28
- aws_region = os.environ.get("AWS_REGION", "us-east-1") # Default to us-east-1 if not specified
29
- openai_key = os.environ.get("OPENAI_API_KEY")
30
-
31
- if not aws_access_key or not aws_secret_key or not openai_key:
32
- return {
33
- "status": "error",
34
- "message": "Please set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and OPENAI_API_KEY in your Hugging Face Space secrets."
35
- }
36
-
37
- return {
38
- "status": "success",
39
- "aws_access_key": aws_access_key,
40
- "aws_secret_key": aws_secret_key,
41
- "aws_region": aws_region,
42
- "openai_key": openai_key
43
- }
44
-
45
- class AuditAgent:
46
- def __init__(self, model_name, provider):
47
- self.model_name = model_name
48
- self.provider = provider
49
- self.document_store = None
50
-
51
- # Get API keys
52
- api_keys = get_api_keys()
53
- if api_keys["status"] == "error":
54
- raise ValueError(api_keys["message"])
55
-
56
- if provider == "bedrock":
57
- # Initialize AWS Bedrock client
58
- try:
59
- self.bedrock_client = boto3.client(
60
- service_name="bedrock-runtime",
61
- aws_access_key_id=api_keys["aws_access_key"],
62
- aws_secret_access_key=api_keys["aws_secret_key"],
63
- region_name=api_keys["aws_region"]
64
- )
65
-
66
- self.llm = ChatBedrock(
67
- client=self.bedrock_client,
68
- model_id="anthropic.claude-3-sonnet-20240229-v1:0",
69
- model_kwargs={"temperature": 0.2}
70
- )
71
- except Exception as e:
72
- logging.error(f"Bedrock initialization error: {str(e)}")
73
- raise ValueError(f"Bedrock initialization error: {str(e)}")
74
- elif provider == "openai":
75
- self.llm = ChatOpenAI(
76
- model_name=model_name,
77
- openai_api_key=api_keys["openai_key"],
78
- temperature=0.2
79
- )
80
- elif provider == "ollama":
81
- try:
82
- self.llm = OllamaLLM(model=model_name)
83
- except Exception as e:
84
- raise ValueError(f"Failed to initialize Ollama model: {str(e)}")
85
- else:
86
- raise ValueError(f"Unsupported provider: {provider}")
87
-
88
- def process_query(self, query):
89
- """Process a general query or numerical problem."""
90
- if not query.strip():
91
- return "Please provide a non-empty query."
92
-
93
- system_prompt = """You are an expert auditor assistant. Provide clear, detailed responses to audit-related queries.
94
- For numerical problems, show your calculations step by step. Always consider relevant accounting standards and auditing principles."""
95
-
96
- try:
97
- if self.provider == "bedrock":
98
- response = self.llm.invoke(
99
- f"{system_prompt}\n\nUser: {query}\nAssistant:"
100
- )
101
- return response.content
102
- elif self.provider == "openai":
103
- response = self.llm.invoke(
104
- [
105
- {"role": "system", "content": system_prompt},
106
- {"role": "user", "content": query}
107
- ]
108
- )
109
- return response.content
110
- else: # Ollama
111
- full_prompt = f"{system_prompt}\n\nUser: {query}\nAssistant:"
112
- response = self.llm.invoke(full_prompt)
113
- return response
114
- except Exception as e:
115
- return f"Error processing query: {str(e)}"
116
-
117
- def process_documents(self, file):
118
- """Process uploaded documents and create a vector store."""
119
- if not file:
120
- return "Please upload a file"
121
-
122
- try:
123
- documents = []
124
-
125
- # Create temporary directory
126
- temp_dir = tempfile.mkdtemp()
127
- temp_path = os.path.join(temp_dir, file.name)
128
-
129
- # Save uploaded file
130
- with open(temp_path, 'wb') as f:
131
- f.write(file.read())
132
-
133
- # Get file extension and check it's supported
134
- file_ext = os.path.splitext(file.name.lower())[1]
135
- supported_exts = ['.pdf', '.docx', '.pptx', '.xlsx', '.xls']
136
-
137
- if file_ext not in supported_exts:
138
- os.remove(temp_path)
139
- os.rmdir(temp_dir)
140
- return f"Unsupported file type. Please upload one of: {', '.join(supported_exts)}"
141
-
142
- # Select appropriate loader
143
- if file_ext == '.pdf':
144
- loader = PyPDFLoader(temp_path)
145
- elif file_ext == '.docx':
146
- loader = Docx2txtLoader(temp_path)
147
- elif file_ext == '.pptx':
148
- loader = UnstructuredPowerPointLoader(temp_path)
149
- elif file_ext in ['.xlsx', '.xls']:
150
- loader = UnstructuredExcelLoader(temp_path)
151
-
152
- # Load and process document
153
- documents.extend(loader.load())
154
-
155
- # Cleanup
156
- os.remove(temp_path)
157
- os.rmdir(temp_dir)
158
-
159
- # Split documents
160
- text_splitter = RecursiveCharacterTextSplitter(
161
- chunk_size=1000,
162
- chunk_overlap=200
163
- )
164
- splits = text_splitter.split_documents(documents)
165
-
166
- # Create vector store
167
- api_keys = get_api_keys()
168
- embeddings = OpenAIEmbeddings(openai_api_key=api_keys["openai_key"])
169
- self.document_store = FAISS.from_documents(splits, embeddings)
170
-
171
- return "Document processed successfully"
172
- except Exception as e:
173
- return f"Error processing document: {str(e)}"
174
-
175
- def query_documents(self, query):
176
- """Query the processed documents."""
177
- if not self.document_store:
178
- return "Please upload and process documents first"
179
-
180
- try:
181
- qa_chain = RetrievalQA.from_chain_type(
182
- llm=self.llm,
183
- chain_type="stuff",
184
- retriever=self.document_store.as_retriever(),
185
- return_source_documents=True
186
- )
187
-
188
- response = qa_chain({"query": query})
189
-
190
- result = response['result']
191
- source_docs = response.get('source_documents', [])
192
-
193
- if source_docs:
194
- result += "\n\nSources:\n"
195
- for i, doc in enumerate(source_docs, 1):
196
- result += f"{i}. {doc.metadata.get('source', 'Unknown source')}\n"
197
-
198
- return result
199
- except Exception as e:
200
- return f"Error querying documents: {str(e)}"
201
-
202
- # Available LLM configurations
203
- llm_configs = {
204
- "claude-3-sonnet": {
205
- "name": "anthropic.claude-3-sonnet-20240229-v1:0",
206
- "provider": "bedrock",
207
- "description": "Balanced performance (AWS Bedrock)"
208
- },
209
- "gpt-4": {
210
- "name": "gpt-4",
211
- "provider": "openai",
212
- "description": "Advanced reasoning"
213
- },
214
- "gpt-3.5-turbo": {
215
- "name": "gpt-3.5-turbo",
216
- "provider": "openai",
217
- "description": "Fast responses"
218
- },
219
- "openorca-mini": {
220
- "name": "openorca-mini",
221
- "provider": "ollama",
222
- "description": "Local lightweight model"
223
- }
224
- }
225
-
226
- def create_interface():
227
- # Check API keys first
228
- api_keys = get_api_keys()
229
- if api_keys["status"] == "error":
230
- with gr.Blocks(theme=gr.themes.Base()) as demo:
231
- gr.Markdown("# ⚠️ Configuration Error")
232
- gr.Markdown(api_keys["message"])
233
- gr.Markdown("""
234
- To set up your Hugging Face Space:
235
- 1. Go to your Space's Settings
236
- 2. Add your API keys as secrets:
237
- - AWS_ACCESS_KEY_ID
238
- - AWS_SECRET_ACCESS_KEY
239
- - AWS_REGION
240
- - OPENAI_API_KEY
241
- 3. Restart your Space
242
- """)
243
- return demo
244
-
245
- # Initialize agents - changed to initialize lazily to avoid startup errors
246
- audit_agents = {}
247
-
248
- with gr.Blocks(theme=gr.themes.Base()) as demo:
249
- gr.Markdown("# 🔍 Amy - Your Audit Copilot")
250
-
251
- with gr.Row():
252
- with gr.Column(scale=1):
253
- file_upload = gr.File(
254
- label="Upload Audit Documents",
255
- file_types=["pdf", "docx", "pptx", "xlsx", "xls"]
256
- )
257
-
258
- # Use tabs for model selection instead of dropdown
259
- with gr.Tabs() as model_tabs:
260
- model_tab_dict = {}
261
- for model_id, config in llm_configs.items():
262
- with gr.Tab(f"{model_id} - {config['description']}") as tab:
263
- model_tab_dict[model_id] = tab
264
-
265
- with gr.Tabs() as feature_tabs:
266
- with gr.Tab("💬 General Chat"):
267
- chat_input = gr.Textbox(
268
- lines=3,
269
- label="Ask your audit question",
270
- placeholder="Enter your question here..."
271
- )
272
- chat_button = gr.Button("Send")
273
- chat_output = gr.Markdown(label="Response")
274
-
275
- with gr.Tab("🔢 Numerical Problem"):
276
- problem_input = gr.Textbox(
277
- lines=5,
278
- label="Describe the Problem",
279
- placeholder="Enter your numerical audit problem..."
280
- )
281
- solve_button = gr.Button("Solve")
282
- solution_output = gr.Markdown(label="Solution")
283
-
284
- with gr.Tab("📑 Document Query"):
285
- query_input = gr.Textbox(
286
- lines=3,
287
- label="Query Documents",
288
- placeholder="Ask about your uploaded documents..."
289
- )
290
- query_button = gr.Button("Query")
291
- query_output = gr.Markdown(label="Response")
292
-
293
- # Status indicator for initialization and operations
294
- status_message = gr.Textbox(label="Status", value="Ready")
295
-
296
- # Function to get the currently selected model
297
- def get_selected_model():
298
- for model_id, tab in model_tab_dict.items():
299
- if tab.is_selected:
300
- return model_id
301
- return "claude-3-sonnet" # Default fallback
302
-
303
- # Lazy initialization of models when first used
304
- def get_or_initialize_agent(model_name):
305
- if model_name not in audit_agents:
306
- try:
307
- status_message.update(value=f"Initializing {model_name}...")
308
- config = llm_configs[model_name]
309
- audit_agents[model_name] = AuditAgent(config["name"], config["provider"])
310
- status_message.update(value=f"{model_name} initialized successfully")
311
- except Exception as e:
312
- status_message.update(value=f"Error initializing {model_name}: {str(e)}")
313
- return None
314
- return audit_agents[model_name]
315
-
316
- def handle_chat(query):
317
- model_name = get_selected_model()
318
- agent = get_or_initialize_agent(model_name)
319
- if not agent:
320
- return f"Could not initialize {model_name}. Please check logs for details."
321
- return agent.process_query(query)
322
-
323
- def handle_problem(problem):
324
- model_name = get_selected_model()
325
- agent = get_or_initialize_agent(model_name)
326
- if not agent:
327
- return f"Could not initialize {model_name}. Please check logs for details."
328
- return agent.process_query(problem)
329
-
330
- def handle_file_upload(file):
331
- model_name = get_selected_model()
332
- agent = get_or_initialize_agent(model_name)
333
- if not agent:
334
- return f"Could not initialize {model_name}. Please check logs for details."
335
- return agent.process_documents(file)
336
-
337
- def handle_query(query):
338
- model_name = get_selected_model()
339
- agent = get_or_initialize_agent(model_name)
340
- if not agent:
341
- return f"Could not initialize {model_name}. Please check logs for details."
342
- return agent.query_documents(query)
343
-
344
- # Set up event handlers
345
- chat_button.click(
346
- handle_chat,
347
- inputs=[chat_input],
348
- outputs=[chat_output]
349
- )
350
-
351
- solve_button.click(
352
- handle_problem,
353
- inputs=[problem_input],
354
- outputs=[solution_output]
355
- )
356
-
357
- file_upload.upload(
358
- handle_file_upload,
359
- inputs=[file_upload],
360
- outputs=[status_message]
361
- )
362
-
363
- query_button.click(
364
- handle_query,
365
- inputs=[query_input],
366
- outputs=[query_output]
367
- )
368
-
369
- return demo
370
-
371
- if __name__ == "__main__":
372
- demo = create_interface()
373
- demo.launch(share=False) # Set share=False on Hugging Face Spaces