Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,9 @@ from langchain.schema.runnable import RunnablePassthrough
|
|
| 14 |
from langchain.schema.output_parser import StrOutputParser
|
| 15 |
from langchain.llms import HuggingFacePipeline
|
| 16 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
"""
|
|
@@ -32,10 +35,13 @@ pdf_files = ["Apple-10K-2023.pdf", "Apple-10K-2024.pdf"]
|
|
| 32 |
all_documents = []
|
| 33 |
|
| 34 |
def preprocess_text(text):
|
| 35 |
-
|
| 36 |
-
text =
|
|
|
|
|
|
|
|
|
|
| 37 |
return text
|
| 38 |
-
|
| 39 |
for pdf_path in pdf_files:
|
| 40 |
loader = PyPDFLoader(pdf_path)
|
| 41 |
documents = loader.load()
|
|
@@ -99,13 +105,17 @@ conversation_chain = (
|
|
| 99 |
| StrOutputParser()
|
| 100 |
)
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
"""### π Step 8: Create a function to get the confidence score"""
|
| 103 |
|
| 104 |
# Function to Get Confidence Score
|
| 105 |
def get_confidence_score(question):
|
| 106 |
retrieved_docs_with_scores = vectordb.similarity_search_with_score(question, k=5)
|
| 107 |
max_score = max([doc[1] for doc in retrieved_docs_with_scores]) if retrieved_docs_with_scores else 0
|
| 108 |
-
print(max_score)
|
| 109 |
return min(1.0, round(max_score, 2)) # Normalize to 0-1 scale
|
| 110 |
|
| 111 |
|
|
@@ -116,11 +126,32 @@ def chat_with_rag(message, history):
|
|
| 116 |
try:
|
| 117 |
response = conversation_chain.invoke(message)
|
| 118 |
confidence_score = get_confidence_score(message)
|
| 119 |
-
formatted_response = f"**Answer:** {response}\n\n**Confidence Score:** {confidence_score:.2f}"
|
| 120 |
return formatted_response
|
| 121 |
except Exception as e:
|
| 122 |
return f"Error: {str(e)}"
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
# Create Gradio Chatbot UI with Auto-Clearing Input
|
| 125 |
demo = gr.ChatInterface(
|
| 126 |
fn=chat_with_rag, # Function to generate responses
|
|
@@ -128,9 +159,9 @@ demo = gr.ChatInterface(
|
|
| 128 |
description="Ask questions about financial reports and get AI-powered answers!",
|
| 129 |
theme="soft", # Aesthetic theme
|
| 130 |
examples=[
|
|
|
|
| 131 |
["What was Apple's total revenue in 2024?"],
|
| 132 |
["What are the biggest financial risks for Apple?"],
|
| 133 |
-
["What are the biggest challenges for Apple?"],
|
| 134 |
["What is the capital of France?"]
|
| 135 |
],
|
| 136 |
submit_btn="Ask", # Customize the submit button text
|
|
@@ -138,25 +169,5 @@ demo = gr.ChatInterface(
|
|
| 138 |
)
|
| 139 |
|
| 140 |
|
| 141 |
-
"""
|
| 142 |
-
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
| 143 |
-
"""
|
| 144 |
-
# demo = gr.ChatInterface(
|
| 145 |
-
# respond,
|
| 146 |
-
# additional_inputs=[
|
| 147 |
-
# gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
| 148 |
-
# gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 149 |
-
# gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 150 |
-
# gr.Slider(
|
| 151 |
-
# minimum=0.1,
|
| 152 |
-
# maximum=1.0,
|
| 153 |
-
# value=0.95,
|
| 154 |
-
# step=0.05,
|
| 155 |
-
# label="Top-p (nucleus sampling)",
|
| 156 |
-
# ),
|
| 157 |
-
# ],
|
| 158 |
-
# )
|
| 159 |
-
|
| 160 |
-
|
| 161 |
if __name__ == "__main__":
|
| 162 |
demo.launch()
|
|
|
|
| 14 |
from langchain.schema.output_parser import StrOutputParser
|
| 15 |
from langchain.llms import HuggingFacePipeline
|
| 16 |
import gradio as gr
|
| 17 |
+
import re
|
| 18 |
+
from bs4 import BeautifulSoup
|
| 19 |
+
import inflection
|
| 20 |
|
| 21 |
|
| 22 |
"""
|
|
|
|
| 35 |
all_documents = []
|
| 36 |
|
| 37 |
def preprocess_text(text):
|
| 38 |
+
# Remove HTML tags
|
| 39 |
+
text = BeautifulSoup(text, "html.parser").get_text()
|
| 40 |
+
|
| 41 |
+
# Remove extra whitespace and newlines
|
| 42 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
| 43 |
return text
|
| 44 |
+
|
| 45 |
for pdf_path in pdf_files:
|
| 46 |
loader = PyPDFLoader(pdf_path)
|
| 47 |
documents = loader.load()
|
|
|
|
| 105 |
| StrOutputParser()
|
| 106 |
)
|
| 107 |
|
| 108 |
+
def to_camel_case(text):
|
| 109 |
+
"""Convert normal text to camelCase using inflection package."""
|
| 110 |
+
camel_text = inflection.camelize(text, uppercase_first_letter=True) # Ensure lowerCamelCase
|
| 111 |
+
return camel_text
|
| 112 |
+
|
| 113 |
"""### π Step 8: Create a function to get the confidence score"""
|
| 114 |
|
| 115 |
# Function to Get Confidence Score
|
| 116 |
def get_confidence_score(question):
|
| 117 |
retrieved_docs_with_scores = vectordb.similarity_search_with_score(question, k=5)
|
| 118 |
max_score = max([doc[1] for doc in retrieved_docs_with_scores]) if retrieved_docs_with_scores else 0
|
|
|
|
| 119 |
return min(1.0, round(max_score, 2)) # Normalize to 0-1 scale
|
| 120 |
|
| 121 |
|
|
|
|
| 126 |
try:
|
| 127 |
response = conversation_chain.invoke(message)
|
| 128 |
confidence_score = get_confidence_score(message)
|
| 129 |
+
formatted_response = f"**Answer:** {to_camel_case(response)}\n\n**Confidence Score:** {confidence_score:.2f}"
|
| 130 |
return formatted_response
|
| 131 |
except Exception as e:
|
| 132 |
return f"Error: {str(e)}"
|
| 133 |
|
| 134 |
+
# A relevant financial question (high-confidence).
|
| 135 |
+
|
| 136 |
+
user_input = "what are the biggest challenges for Apple?"
|
| 137 |
+
confidence_score = get_confidence_score(user_input)
|
| 138 |
+
output = conversation_chain.invoke(user_input)
|
| 139 |
+
print(f"π **Answer:** {to_camel_case(output)}\n\n**Confidence Score:** {confidence_score:.2f}")
|
| 140 |
+
|
| 141 |
+
# A relevant financial question (low-confidence).
|
| 142 |
+
|
| 143 |
+
user_input = "what was apple's Total revenue in 2023?"
|
| 144 |
+
confidence_score = get_confidence_score(user_input)
|
| 145 |
+
output = conversation_chain.invoke(user_input)
|
| 146 |
+
print(f"π **Answer:** {to_camel_case(output)}\n\n**Confidence Score:** {confidence_score:.2f}")
|
| 147 |
+
|
| 148 |
+
# An irrelevant question (e.g., "What is the capital of France?") to check system robustness.
|
| 149 |
+
|
| 150 |
+
user_input = "What is the capital of France?"
|
| 151 |
+
output = conversation_chain.invoke(user_input)
|
| 152 |
+
confidence_score = get_confidence_score(user_input)
|
| 153 |
+
print(f"π **Answer:** {to_camel_case(output)}\n\n**Confidence Score:** {confidence_score:.2f}")
|
| 154 |
+
|
| 155 |
# Create Gradio Chatbot UI with Auto-Clearing Input
|
| 156 |
demo = gr.ChatInterface(
|
| 157 |
fn=chat_with_rag, # Function to generate responses
|
|
|
|
| 159 |
description="Ask questions about financial reports and get AI-powered answers!",
|
| 160 |
theme="soft", # Aesthetic theme
|
| 161 |
examples=[
|
| 162 |
+
["What are the biggest challenges for Apple?"],
|
| 163 |
["What was Apple's total revenue in 2024?"],
|
| 164 |
["What are the biggest financial risks for Apple?"],
|
|
|
|
| 165 |
["What is the capital of France?"]
|
| 166 |
],
|
| 167 |
submit_btn="Ask", # Customize the submit button text
|
|
|
|
| 169 |
)
|
| 170 |
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
if __name__ == "__main__":
|
| 173 |
demo.launch()
|