AliceRolan commited on
Commit
33789e2
Β·
verified Β·
1 Parent(s): 5fc8242

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -26
app.py CHANGED
@@ -14,6 +14,9 @@ from langchain.schema.runnable import RunnablePassthrough
14
  from langchain.schema.output_parser import StrOutputParser
15
  from langchain.llms import HuggingFacePipeline
16
  import gradio as gr
 
 
 
17
 
18
 
19
  """
@@ -32,10 +35,13 @@ pdf_files = ["Apple-10K-2023.pdf", "Apple-10K-2024.pdf"]
32
  all_documents = []
33
 
34
  def preprocess_text(text):
35
- text = text.replace("\n", " ").strip()
36
- text = ' '.join(text.split()) # Remove extra spaces
 
 
 
37
  return text
38
-
39
  for pdf_path in pdf_files:
40
  loader = PyPDFLoader(pdf_path)
41
  documents = loader.load()
@@ -99,13 +105,17 @@ conversation_chain = (
99
  | StrOutputParser()
100
  )
101
 
 
 
 
 
 
102
  """### πŸ“Œ Step 8: Create a function to get the confidence score"""
103
 
104
  # Function to Get Confidence Score
105
  def get_confidence_score(question):
106
  retrieved_docs_with_scores = vectordb.similarity_search_with_score(question, k=5)
107
  max_score = max([doc[1] for doc in retrieved_docs_with_scores]) if retrieved_docs_with_scores else 0
108
- print(max_score)
109
  return min(1.0, round(max_score, 2)) # Normalize to 0-1 scale
110
 
111
 
@@ -116,11 +126,32 @@ def chat_with_rag(message, history):
116
  try:
117
  response = conversation_chain.invoke(message)
118
  confidence_score = get_confidence_score(message)
119
- formatted_response = f"**Answer:** {response}\n\n**Confidence Score:** {confidence_score:.2f}"
120
  return formatted_response
121
  except Exception as e:
122
  return f"Error: {str(e)}"
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  # Create Gradio Chatbot UI with Auto-Clearing Input
125
  demo = gr.ChatInterface(
126
  fn=chat_with_rag, # Function to generate responses
@@ -128,9 +159,9 @@ demo = gr.ChatInterface(
128
  description="Ask questions about financial reports and get AI-powered answers!",
129
  theme="soft", # Aesthetic theme
130
  examples=[
 
131
  ["What was Apple's total revenue in 2024?"],
132
  ["What are the biggest financial risks for Apple?"],
133
- ["What are the biggest challenges for Apple?"],
134
  ["What is the capital of France?"]
135
  ],
136
  submit_btn="Ask", # Customize the submit button text
@@ -138,25 +169,5 @@ demo = gr.ChatInterface(
138
  )
139
 
140
 
141
- """
142
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
143
- """
144
- # demo = gr.ChatInterface(
145
- # respond,
146
- # additional_inputs=[
147
- # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
148
- # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
149
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
150
- # gr.Slider(
151
- # minimum=0.1,
152
- # maximum=1.0,
153
- # value=0.95,
154
- # step=0.05,
155
- # label="Top-p (nucleus sampling)",
156
- # ),
157
- # ],
158
- # )
159
-
160
-
161
  if __name__ == "__main__":
162
  demo.launch()
 
14
  from langchain.schema.output_parser import StrOutputParser
15
  from langchain.llms import HuggingFacePipeline
16
  import gradio as gr
17
+ import re
18
+ from bs4 import BeautifulSoup
19
+ import inflection
20
 
21
 
22
  """
 
35
  all_documents = []
36
 
37
  def preprocess_text(text):
38
+ # Remove HTML tags
39
+ text = BeautifulSoup(text, "html.parser").get_text()
40
+
41
+ # Remove extra whitespace and newlines
42
+ text = re.sub(r'\s+', ' ', text).strip()
43
  return text
44
+
45
  for pdf_path in pdf_files:
46
  loader = PyPDFLoader(pdf_path)
47
  documents = loader.load()
 
105
  | StrOutputParser()
106
  )
107
 
108
+ def to_camel_case(text):
109
+ """Convert normal text to camelCase using inflection package."""
110
+ camel_text = inflection.camelize(text, uppercase_first_letter=True) # Ensure lowerCamelCase
111
+ return camel_text
112
+
113
  """### πŸ“Œ Step 8: Create a function to get the confidence score"""
114
 
115
  # Function to Get Confidence Score
116
  def get_confidence_score(question):
117
  retrieved_docs_with_scores = vectordb.similarity_search_with_score(question, k=5)
118
  max_score = max([doc[1] for doc in retrieved_docs_with_scores]) if retrieved_docs_with_scores else 0
 
119
  return min(1.0, round(max_score, 2)) # Normalize to 0-1 scale
120
 
121
 
 
126
  try:
127
  response = conversation_chain.invoke(message)
128
  confidence_score = get_confidence_score(message)
129
+ formatted_response = f"**Answer:** {to_camel_case(response)}\n\n**Confidence Score:** {confidence_score:.2f}"
130
  return formatted_response
131
  except Exception as e:
132
  return f"Error: {str(e)}"
133
 
134
+ # A relevant financial question (high-confidence).
135
+
136
+ user_input = "what are the biggest challenges for Apple?"
137
+ confidence_score = get_confidence_score(user_input)
138
+ output = conversation_chain.invoke(user_input)
139
+ print(f"πŸ“Œ **Answer:** {to_camel_case(output)}\n\n**Confidence Score:** {confidence_score:.2f}")
140
+
141
+ # A relevant financial question (low-confidence).
142
+
143
+ user_input = "what was apple's Total revenue in 2023?"
144
+ confidence_score = get_confidence_score(user_input)
145
+ output = conversation_chain.invoke(user_input)
146
+ print(f"πŸ“Œ **Answer:** {to_camel_case(output)}\n\n**Confidence Score:** {confidence_score:.2f}")
147
+
148
+ # An irrelevant question (e.g., "What is the capital of France?") to check system robustness.
149
+
150
+ user_input = "What is the capital of France?"
151
+ output = conversation_chain.invoke(user_input)
152
+ confidence_score = get_confidence_score(user_input)
153
+ print(f"πŸ“Œ **Answer:** {to_camel_case(output)}\n\n**Confidence Score:** {confidence_score:.2f}")
154
+
155
  # Create Gradio Chatbot UI with Auto-Clearing Input
156
  demo = gr.ChatInterface(
157
  fn=chat_with_rag, # Function to generate responses
 
159
  description="Ask questions about financial reports and get AI-powered answers!",
160
  theme="soft", # Aesthetic theme
161
  examples=[
162
+ ["What are the biggest challenges for Apple?"],
163
  ["What was Apple's total revenue in 2024?"],
164
  ["What are the biggest financial risks for Apple?"],
 
165
  ["What is the capital of France?"]
166
  ],
167
  submit_btn="Ask", # Customize the submit button text
 
169
  )
170
 
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  if __name__ == "__main__":
173
  demo.launch()