neerajkalyank commited on
Commit
90c8b76
·
verified ·
1 Parent(s): 9df19db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -121
app.py CHANGED
@@ -7,16 +7,13 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  import faiss
8
  from simple_salesforce import Salesforce
9
  from dotenv import load_dotenv
10
- import json
11
- import zipfile
12
- from pathlib import Path
13
 
14
  # Setup logging
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
  # Load environment variables from .env file
19
- load_dotenv() # Load the .env file
20
 
21
  # Get the Salesforce credentials from environment variables
22
  sf_username = os.getenv("SF_USERNAME")
@@ -42,23 +39,15 @@ except Exception as e:
42
  logger.error(f"❌ Salesforce connection failed: {str(e)}")
43
  raise
44
 
45
- # --- Extract zip files and read documents ---
46
- def extract_zip(zip_path, extract_to):
47
- try:
48
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
49
- zip_ref.extractall(extract_to)
50
- logger.info(f"Extracted {zip_path} to {extract_to}")
51
- except Exception as e:
52
- logger.error(f"Failed to extract {zip_path}: {str(e)}")
53
- raise
54
-
55
- def load_documents(folder_path):
56
- documents = []
57
- sources = []
58
- for file in Path(folder_path).rglob("*.txt"):
59
- text = file.read_text(encoding="utf-8", errors="ignore")
60
- documents.append(text)
61
- sources.append(file.name)
62
  return documents, sources
63
 
64
  # --- Chunking ---
@@ -68,70 +57,52 @@ text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
68
  model = SentenceTransformer("all-MiniLM-L6-v2")
69
 
70
  # --- Preprocessing ---
71
- data_dir = Path("./data")
72
- data_dir.mkdir(exist_ok=True)
73
-
74
- doc_folders = [
75
- ("Company_Policies.zip", "Company_Policies"),
76
- ("HR_Policies.zip", "Hr_Policies"),
77
- ("Contract_Clauses.zip", "Contract_Clauses")
78
- ]
79
-
80
  all_chunks = []
81
  metadata = []
82
 
83
- for zip_name, folder in doc_folders:
84
- zip_path = Path(zip_name)
85
- if not zip_path.exists():
86
- logger.error(f"Zip file {zip_name} not found")
87
- raise FileNotFoundError(f"Zip file {zip_name} not found")
88
- extract_path = data_dir / folder
89
- extract_path.mkdir(exist_ok=True)
90
- extract_zip(zip_path, extract_path)
91
- docs, sources = load_documents(extract_path)
92
  if not docs:
93
- logger.error(f"No documents found in {extract_path}")
94
- raise ValueError(f"No documents found in {extract_path}")
95
  for doc, src in zip(docs, sources):
96
  chunks = text_splitter.split_text(doc)
97
  all_chunks.extend(chunks)
98
- src_url = f"https://company.com/{folder}/{src}"
99
  metadata.extend([src_url] * len(chunks))
 
 
 
100
 
101
  # --- Embeddings + FAISS index ---
102
- embeddings = model.encode(all_chunks)
103
- index = faiss.IndexFlatL2(embeddings.shape[1])
104
- index.add(np.array(embeddings))
105
- logger.info("FAISS index built successfully")
 
 
 
 
106
 
107
  # --- Create Record in Salesforce ---
108
  def create_salesforce_record(query, answer, confidence_percentage, source_link):
109
  try:
110
- # Convert the confidence_percentage to Python float (to avoid numpy float32)
111
  confidence_percentage = float(confidence_percentage)
112
-
113
- # Data with correctly mapped field names
114
  data = {
115
- "Query__c": query, # Field for User Query
116
- "Answer__c": answer, # Field for Answer
117
- "Confidence_Percentage__c": confidence_percentage, # Field for Confidence Score
118
- "Document_link__c": source_link, # Field for Document Link
119
  }
120
-
121
- # Creating the record in Salesforce
122
  response = sf.chat_query_log__c.create(data)
123
-
124
- # Check if record was created successfully
125
- if 'id' in response: # If the response contains an 'id', the record is created successfully
126
  record_id = response['id']
127
  logger.info(f"✅ Record created successfully in Salesforce with ID: {record_id}")
128
- return record_id # Return the Salesforce record ID
129
  else:
130
- # Log the failure response
131
  logger.error(f"❌ Failed to create Salesforce record. Response: {response}")
132
  return None
133
  except Exception as e:
134
- # Log any error during record creation
135
  logger.error(f"Error creating Salesforce record: {str(e)}")
136
  return None
137
 
@@ -145,81 +116,61 @@ def answer_query(query):
145
  top_sources = [metadata[i] for i in I[0]]
146
  distances = D[0]
147
 
148
- relevant_chunks = [
149
- chunk for chunk, dist in zip(top_chunks, distances) if dist < 0.8
150
- ]
151
- relevant_sources = [
152
- src for src, dist in zip(top_sources, distances) if dist < 0.8
153
- ]
154
 
155
  if not relevant_chunks:
156
- return "No relevant information found.", "Confidence: 0%", "Source Link: None"
157
 
158
  answer = relevant_chunks[0].strip()
159
  min_distance = min(distances)
160
  confidence_percentage = max(0, 100 - (min_distance * 100))
161
  source_link = relevant_sources[0] if relevant_sources else "None"
162
 
163
- # Create Salesforce record for the query response
164
  record_id = create_salesforce_record(query, answer, confidence_percentage, source_link)
165
 
166
- if record_id:
167
- return (
168
- answer,
169
- f"Confidence: {confidence_percentage:.2f}%",
170
- f"Source Link: {source_link}",
171
- f"Salesforce Record ID: {record_id}" # Display the Salesforce record ID
172
- )
173
- else:
174
- return (
175
- answer,
176
- f"Confidence: {confidence_percentage:.2f}%",
177
- f"Source Link: {source_link}",
178
- "Failed to create record in Salesforce"
179
- )
180
  except Exception as e:
181
  logger.error(f"Error in answer_query: {str(e)}")
182
- return f"Error: {str(e)}", "", "", ""
183
 
184
- # --- Gradio Chatbot UI ---
185
- def process_question(q):
186
- if not q.strip():
187
- return "Please enter a question.", "", ""
188
-
189
- answer, confidence, source, record_id = answer_query(q)
190
- return answer, confidence, source, record_id
 
 
 
 
 
 
191
 
192
- # --- Chatbot UI Design (Chat bubble style) ---
193
  with gr.Blocks(title="Company Documents Q&A Chatbot", theme=gr.themes.Soft()) as demo:
194
  gr.Markdown("## 📚 Company Documents Q&A Chatbot")
195
-
196
  with gr.Row():
197
- with gr.Column(scale=3):
198
- question = gr.Textbox(
199
- label="Ask a Question",
200
- placeholder="What are the conditions for permanent employment status?",
201
- lines=1,
202
- interactive=True
203
- )
204
- with gr.Column(scale=1):
205
- submit_btn = gr.Button("Submit", variant="primary")
 
 
 
 
 
206
 
207
- with gr.Row():
208
- with gr.Column():
209
- # Chatbot styled for a more modern chat look with bubbles
210
- output_area = gr.HTML(
211
- label="Chat",
212
- elem_id="chatbox",
213
- value="""
214
- <div style="padding: 10px; background-color: #f5f5f5; border-radius: 10px;">
215
- <div style="padding: 5px 10px; background-color: #dfe1e6; border-radius: 10px; margin-bottom: 10px;">
216
- <b>User:</b> <span id="user-message"> </span>
217
- </div>
218
- <div style="padding: 5px 10px; background-color: #007bff; color: white; border-radius: 10px;">
219
- <b>Bot:</b> <span id="bot-message"> </span>
220
- </div>
221
- </div>""")
222
-
223
- submit_btn.click(fn=process_question, inputs=question, outputs=[output_area])
224
-
225
- demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
7
  import faiss
8
  from simple_salesforce import Salesforce
9
  from dotenv import load_dotenv
 
 
 
10
 
11
  # Setup logging
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
14
 
15
  # Load environment variables from .env file
16
+ load_dotenv()
17
 
18
  # Get the Salesforce credentials from environment variables
19
  sf_username = os.getenv("SF_USERNAME")
 
39
  logger.error(f"❌ Salesforce connection failed: {str(e)}")
40
  raise
41
 
42
+ # --- Simulate document loading (replace with actual document loading in Hugging Face) ---
43
+ def load_documents():
44
+ # Simulate documents for Hugging Face compatibility (replace with actual data)
45
+ documents = [
46
+ "Permanent employment status is granted after 6 months of continuous employment with satisfactory performance.",
47
+ "HR policies include 20 days of paid leave annually and mandatory diversity training.",
48
+ "Contract clauses require a 30-day notice period for termination."
49
+ ]
50
+ sources = ["policy1.txt", "hr1.txt", "contract1.txt"]
 
 
 
 
 
 
 
 
51
  return documents, sources
52
 
53
  # --- Chunking ---
 
57
  model = SentenceTransformer("all-MiniLM-L6-v2")
58
 
59
  # --- Preprocessing ---
 
 
 
 
 
 
 
 
 
60
  all_chunks = []
61
  metadata = []
62
 
63
+ try:
64
+ docs, sources = load_documents()
 
 
 
 
 
 
 
65
  if not docs:
66
+ logger.error("No documents found")
67
+ raise ValueError("No documents found")
68
  for doc, src in zip(docs, sources):
69
  chunks = text_splitter.split_text(doc)
70
  all_chunks.extend(chunks)
71
+ src_url = f"https://company.com/documents/{src}"
72
  metadata.extend([src_url] * len(chunks))
73
+ except Exception as e:
74
+ logger.error(f"Error loading documents: {str(e)}")
75
+ raise
76
 
77
  # --- Embeddings + FAISS index ---
78
+ try:
79
+ embeddings = model.encode(all_chunks)
80
+ index = faiss.IndexFlatL2(embeddings.shape[1])
81
+ index.add(np.array(embeddings))
82
+ logger.info("FAISS index built successfully")
83
+ except Exception as e:
84
+ logger.error(f"Error building FAISS index: {str(e)}")
85
+ raise
86
 
87
  # --- Create Record in Salesforce ---
88
  def create_salesforce_record(query, answer, confidence_percentage, source_link):
89
  try:
 
90
  confidence_percentage = float(confidence_percentage)
 
 
91
  data = {
92
+ "Query__c": query,
93
+ "Answer__c": answer,
94
+ "Confidence_Percentage__c": confidence_percentage,
95
+ "Document_link__c": source_link,
96
  }
 
 
97
  response = sf.chat_query_log__c.create(data)
98
+ if 'id' in response:
 
 
99
  record_id = response['id']
100
  logger.info(f"✅ Record created successfully in Salesforce with ID: {record_id}")
101
+ return record_id
102
  else:
 
103
  logger.error(f"❌ Failed to create Salesforce record. Response: {response}")
104
  return None
105
  except Exception as e:
 
106
  logger.error(f"Error creating Salesforce record: {str(e)}")
107
  return None
108
 
 
116
  top_sources = [metadata[i] for i in I[0]]
117
  distances = D[0]
118
 
119
+ relevant_chunks = [chunk for chunk, dist in zip(top_chunks, distances) if dist < 0.8]
120
+ relevant_sources = [src for src, dist in zip(top_sources, distances) if dist < 0.8]
 
 
 
 
121
 
122
  if not relevant_chunks:
123
+ return "No relevant information found.", "Confidence: 0%", "Source Link: None", None
124
 
125
  answer = relevant_chunks[0].strip()
126
  min_distance = min(distances)
127
  confidence_percentage = max(0, 100 - (min_distance * 100))
128
  source_link = relevant_sources[0] if relevant_sources else "None"
129
 
 
130
  record_id = create_salesforce_record(query, answer, confidence_percentage, source_link)
131
 
132
+ return (
133
+ answer,
134
+ f"Confidence: {confidence_percentage:.2f}%",
135
+ f"Source Link: {source_link}",
136
+ record_id
137
+ )
 
 
 
 
 
 
 
 
138
  except Exception as e:
139
  logger.error(f"Error in answer_query: {str(e)}")
140
+ return f"Error: {str(e)}", "", "", None
141
 
142
+ # --- Gradio Chatbot Function ---
143
+ def process_question(user_input, history):
144
+ if not user_input.strip():
145
+ return history + [[user_input, "Please enter a valid question."]]
146
+
147
+ answer, confidence, source, record_id = answer_query(user_input)
148
+ response = f"{answer}\n\n{confidence}\n{source}"
149
+ if record_id:
150
+ response += f"\nSalesforce Record ID: {record_id}"
151
+ else:
152
+ response += "\nFailed to create record in Salesforce"
153
+
154
+ return history + [[user_input, response]]
155
 
156
+ # --- Gradio Chatbot UI ---
157
  with gr.Blocks(title="Company Documents Q&A Chatbot", theme=gr.themes.Soft()) as demo:
158
  gr.Markdown("## 📚 Company Documents Q&A Chatbot")
159
+ chatbot = gr.Chatbot(label="Chat", height=400)
160
  with gr.Row():
161
+ question = gr.Textbox(
162
+ label="Ask a Question",
163
+ placeholder="What are the conditions for permanent employment status?",
164
+ lines=1,
165
+ interactive=True
166
+ )
167
+ submit_btn = gr.Button("Submit", variant="primary")
168
+
169
+ submit_btn.click(
170
+ fn=process_question,
171
+ inputs=[question, chatbot],
172
+ outputs=chatbot,
173
+ _js="() => {const txt = document.querySelector('input[type=text]'); txt.value=''; txt.focus(); return []}"
174
+ )
175
 
176
+ demo.launch(server_name="0.0.0.0", server_port=7860)