foreversheikh commited on
Commit
ebc1af9
·
verified ·
1 Parent(s): 399a6ed

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ src/data/vector_stores/default_pdf_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
37
+ src/Ring_App_Documentation.pdf filter=lfs diff=lfs merge=lfs -text
src/Ring_App_Documentation.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64e0a8f1ceb73caf38efb4ca3dc534b8071b41a06d32758ae9e88a65695ea171
3
+ size 4177325
src/chat_logic.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # chat_logic.py
2
+
3
+ import os
4
+ import re
5
+ import warnings
6
+ from pathlib import Path
7
+ from typing import Any, Tuple, Optional, Dict
8
+
9
+ # Langchain/OpenAI imports
10
+ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
11
+ from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
12
+ from langchain_classic.chains import ConversationalRetrievalChain
13
+ from langchain_classic.memory import ConversationBufferMemory, ConversationSummaryBufferMemory
14
+ from langchain_community.document_loaders import PyPDFLoader
15
+ from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter
16
+ from langchain_community.vectorstores import Chroma
17
+ from langchain_community.document_transformers import EmbeddingsRedundantFilter, LongContextReorder
18
+ from langchain_classic.retrievers.document_compressors import DocumentCompressorPipeline
19
+ from langchain_classic.retrievers.document_compressors import EmbeddingsFilter
20
+ from langchain_classic.retrievers import ContextualCompressionRetriever
21
+ from langchain_text_splitters import TextSplitter
22
+ from langchain_core.retrievers import BaseRetriever
23
+ from langchain_core.language_models import BaseChatModel
24
+
25
+ # --- Constants & Helpers ---
26
+
27
+ LOCAL_VECTOR_STORE_DIR = Path(__file__).resolve().parent.joinpath("data", "vector_stores")
28
+ # !!! SET YOUR DEFAULT PDF PATH HERE !!!
29
+ # Assuming the default PDF is in the same directory as this script.
30
+ DEFAULT_PDF_PATH = Path(__file__).resolve().parent.joinpath("S:\\ano_dec_pro\\AnomalyDetectionCVPR2018-Pytorch\\ring_chat_bot\\Ring_App_Documentation.pdf")
31
+ DEFAULT_VECTORSTORE_NAME = "default_pdf_db"
32
+ OPENAI_KEY = os.getenv("OPENAI_API_KEY")
33
+
34
+
35
+ def ensure_dir(p: Path) -> None:
36
+ p.mkdir(parents=True, exist_ok=True)
37
+
38
+ def load_default_pdf():
39
+ # Attempt to find the default PDF in the script directory
40
+ if not DEFAULT_PDF_PATH.exists():
41
+ raise FileNotFoundError(
42
+ f"Default PDF not found: {DEFAULT_PDF_PATH}. Please place your PDF here or update the path in chat_logic.py"
43
+ )
44
+ loader = PyPDFLoader(DEFAULT_PDF_PATH.as_posix())
45
+ return loader.load()
46
+
47
+ def split_documents(docs, chunk_size: int = 1600, chunk_overlap: int = 200):
48
+ splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
49
+ return splitter.split_documents(docs)
50
+
51
+ def select_embeddings(openai_key: str | None) -> OpenAIEmbeddings:
52
+ if not openai_key:
53
+ raise ValueError("OPENAI_API_KEY is required.")
54
+ return OpenAIEmbeddings(api_key=openai_key)
55
+
56
+ # --- Core RAG Components ---
57
+
58
+ def vectorstore_backed_retriever(vs: Chroma, search_type: str = "similarity", k: int = 16, score_threshold: float | None = None) -> BaseRetriever:
59
+ kwargs = {}
60
+ if k is not None:
61
+ kwargs["k"] = k
62
+ if score_threshold is not None:
63
+ kwargs["score_threshold"] = score_threshold
64
+ return vs.as_retriever(search_type=search_type, search_kwargs=kwargs)
65
+
66
+
67
+ def make_compression_retriever(embeddings: OpenAIEmbeddings, base_retriever: BaseRetriever, chunk_size: int = 500, k: int = 16, similarity_threshold: float | None = None) -> ContextualCompressionRetriever:
68
+ splitter: TextSplitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, separator=". ")
69
+ redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
70
+ relevant_filter = EmbeddingsFilter(embeddings=embeddings, k=k, similarity_threshold=similarity_threshold)
71
+ reordering = LongContextReorder()
72
+ pipeline = DocumentCompressorPipeline(transformers=[splitter, redundant_filter, relevant_filter, reordering])
73
+ return ContextualCompressionRetriever(base_compressor=pipeline, base_retriever=base_retriever)
74
+
75
+
76
+ def make_memory(model_name: str, openai_key: str | None):
77
+ # Simplified memory logic for Streamlit
78
+ return ConversationSummaryBufferMemory(
79
+ max_token_limit=1024,
80
+ llm=ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=openai_key, temperature=0.1),
81
+ return_messages=True,
82
+ memory_key="chat_history",
83
+ output_key="answer",
84
+ input_key="question",
85
+ )
86
+
87
+
88
+ def answer_template(language: str = "english") -> str:
89
+ return f"""Answer the question at the end, using only the following context (delimited by <context></context>).
90
+ Your answer must be in the language at the end.
91
+
92
+ <context>
93
+ {{chat_history}}
94
+
95
+ {{context}}
96
+ </context>
97
+
98
+ Question: {{question}}
99
+
100
+ Language: {language}.
101
+ """
102
+
103
+ def build_chain(model: str, retriever: BaseRetriever, openai_key: str | None) -> Tuple[ConversationalRetrievalChain, Any]:
104
+ condense_question_prompt = PromptTemplate(
105
+ input_variables=["chat_history", "question"],
106
+ template=(
107
+ "Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\n\nChat History:\n{chat_history}\n\nFollow Up Input: {question}\n\nStandalone question:"
108
+ ),
109
+ )
110
+ answer_prompt = ChatPromptTemplate.from_template(answer_template(language="english"))
111
+ memory = make_memory(model, openai_key)
112
+
113
+ standalone_llm = ChatOpenAI(api_key=openai_key, model=model, temperature=0.1)
114
+ response_llm = ChatOpenAI(api_key=openai_key, model=model, temperature=0.5, top_p=0.95)
115
+
116
+ chain = ConversationalRetrievalChain.from_llm(
117
+ condense_question_prompt=condense_question_prompt,
118
+ combine_docs_chain_kwargs={"prompt": answer_prompt},
119
+ condense_question_llm=standalone_llm,
120
+ llm=response_llm,
121
+ memory=memory,
122
+ retriever=retriever,
123
+ chain_type="stuff",
124
+ verbose=False,
125
+ return_source_documents=True,
126
+ )
127
+ return chain, memory
128
+
129
+
130
+ def setup_default_rag(openai_key: str, model_name: str = "gpt-4-turbo") -> Tuple[ConversationalRetrievalChain, Any]:
131
+ """
132
+ Sets up the RAG chain using the default hardcoded PDF file.
133
+ This replaces the file upload functionality for the initial setup.
134
+ """
135
+
136
+ vectorstore_path = LOCAL_VECTOR_STORE_DIR.joinpath(DEFAULT_VECTORSTORE_NAME)
137
+ ensure_dir(vectorstore_path)
138
+
139
+ embeddings = select_embeddings(openai_key)
140
+
141
+ # Check if the vector store already exists locally (persistence logic)
142
+ if not any(vectorstore_path.iterdir()):
143
+ # 1. Load and split the default PDF
144
+ docs = load_default_pdf()
145
+ chunks = split_documents(docs)
146
+
147
+ # 2. Create and persist the Vector Store (Chroma)
148
+ vs = Chroma.from_documents(
149
+ documents=chunks,
150
+ embedding=embeddings,
151
+ persist_directory=vectorstore_path.as_posix()
152
+ )
153
+ vs.persist()
154
+ else:
155
+ # 3. Load the existing Vector Store
156
+ vs = Chroma(embedding_function=embeddings, persist_directory=vectorstore_path.as_posix())
157
+
158
+ # 4. Create Retriever
159
+ base_retriever = vectorstore_backed_retriever(vs)
160
+ retriever = make_compression_retriever(embeddings=embeddings, base_retriever=base_retriever)
161
+
162
+ # 5. Build and return chain
163
+ chain, memory = build_chain(model_name, retriever, openai_key)
164
+
165
+ return chain, memory
166
+
167
+ # The process_uploaded_file function is removed as we are hardcoding the default file setup.
src/data/vector_stores/default_pdf_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cf4518dece34cb61b6ed9a0d4d9e80ffbb5b27dbcb456599dd94c53b81a1501
3
+ size 667648
src/data/vector_stores/default_pdf_db/e1eec7e8-c14a-4d91-84f8-494ed1640f40/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6947c7600d0ae572da78c33e440a007be9b2bc4763c61e7f99e7d8695deede2
3
+ size 628400
src/data/vector_stores/default_pdf_db/e1eec7e8-c14a-4d91-84f8-494ed1640f40/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b081be2c2276a57e995075c7de2f3cb25e903798aac36d98042045533ab28f7d
3
+ size 100
src/data/vector_stores/default_pdf_db/e1eec7e8-c14a-4d91-84f8-494ed1640f40/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b043c771f5c6da7fd675c1557bded1b551f2019df55601e652bb22d83312bc9d
3
+ size 400
src/data/vector_stores/default_pdf_db/e1eec7e8-c14a-4d91-84f8-494ed1640f40/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
+ size 0
src/streamlit_app.py CHANGED
@@ -1,40 +1,149 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
- import streamlit as st
5
-
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # rag_streamlit_app.py
2
+
3
+ import streamlit as st
4
+ import os
5
+ import warnings
6
+ import re
7
+ from dotenv import load_dotenv
8
+ from chat_logic import setup_default_rag, OPENAI_KEY # Import core logic
9
+
10
+ # Suppress LangChain and other warnings for a clean Streamlit app
11
+ warnings.filterwarnings("ignore")
12
+ load_dotenv()
13
+
14
+ # --- Configuration ---
15
+ st.set_page_config(page_title="Ring App RAG Chatbot", layout="wide")
16
+
17
+ # --- Initialize Session State ---
18
+ if 'chain' not in st.session_state:
19
+ st.session_state.chain = None
20
+ if 'chat_history' not in st.session_state:
21
+ st.session_state.chat_history = []
22
+ if 'memory' not in st.session_state:
23
+ st.session_state.memory = None
24
+ if 'openai_api_key' not in st.session_state:
25
+ st.session_state.openai_api_key = OPENAI_KEY
26
+
27
+
28
+ # --- Functions for UI Actions ---
29
+
30
+ def clear_chat_history():
31
+ """Resets the chat history and the memory buffer."""
32
+ if st.session_state.memory:
33
+ st.session_state.memory.clear()
34
+ st.session_state.chat_history = []
35
+ st.toast("Chat history cleared!", icon="🧹")
36
+
37
+ def initialize_rag_system():
38
+ """Initializes the RAG chain using the hardcoded default file."""
39
+ if st.session_state.openai_api_key:
40
+ with st.spinner("Setting up the Ring App knowledge base..."):
41
+ try:
42
+ model = "gpt-4-turbo"
43
+
44
+ # CALL THE NEW DEFAULT SETUP FUNCTION
45
+ chain, memory = setup_default_rag(st.session_state.openai_api_key, model)
46
+
47
+ st.session_state.chain = chain
48
+ st.session_state.memory = memory
49
+ st.session_state.chat_history = []
50
+ st.toast("Ring App knowledge base loaded and chatbot ready!", icon="✅")
51
+ except FileNotFoundError as e:
52
+ st.error(f"FATAL ERROR: {e}. Please ensure 'default_rag_file.pdf' is in the script directory.")
53
+ st.session_state.chain = None
54
+ except Exception as e:
55
+ st.error(f"Error setting up RAG system: {e}")
56
+ st.session_state.chain = None
57
+ st.session_state.memory = None
58
+ elif not st.session_state.openai_api_key:
59
+ st.error("Please enter your OpenAI API Key in the sidebar.")
60
+
61
+
62
+ def generate_response(prompt):
63
+ """Invokes the RAG chain with the user's prompt."""
64
+ if st.session_state.chain:
65
+ try:
66
+ # Invoke the chain
67
+ response = st.session_state.chain.invoke({"question": prompt})
68
+ answer = response.get("answer", "Sorry, I couldn't find an answer based only on the Ring App document.")
69
+
70
+ # Clean response logic
71
+ answer = re.sub(r'\\n|\r|\n', ' ', answer)
72
+ answer = re.sub(r'(Sources?:\s*.+$)', '', answer, flags=re.IGNORECASE)
73
+ answer = re.sub(r'\[[^\]]*\]|\([^\)]*\)', '', answer)
74
+ answer = re.sub(r'[*_#>`~\-]{1,}', ' ', answer)
75
+ answer = re.sub(r'\s{2,}', ' ', answer).strip()
76
+
77
+ # Update chat history state
78
+ st.session_state.chat_history.append({"role": "user", "content": prompt})
79
+ st.session_state.chat_history.append({"role": "assistant", "content": answer})
80
+
81
+ return answer
82
+
83
+ except Exception as e:
84
+ st.error(f"An error occurred during the conversation: {e}")
85
+ return "Sorry, there was an error processing your request."
86
+ else:
87
+ return "Please initialize the chatbot using the button in the sidebar."
88
+
89
+
90
+ # --- Streamlit UI Layout ---
91
+
92
+ st.title("Ring App Support Chatbot")
93
+ st.markdown("This RAG system is pre-loaded with knowledge about the **Ring Doorbell App**")
94
+
95
+ # Sidebar for configuration
96
+ with st.sidebar:
97
+ st.header("Configuration")
98
+
99
+ # API Key Input
100
+ st.session_state.openai_api_key = st.text_input(
101
+ "OpenAI API Key",
102
+ value=st.session_state.openai_api_key,
103
+ type="password",
104
+ help="Required to use OpenAI embeddings and models."
105
+ )
106
+
107
+ st.markdown("---")
108
+
109
+ # Initialization Button
110
+ if st.button("Initialize Chatbot", type="primary"):
111
+ initialize_rag_system()
112
+
113
+ st.caption("The chatbot will only answer from the pre-loaded Ring App documentation.")
114
+
115
+ st.markdown("---")
116
+
117
+ # Reset Button
118
+ if st.button("Clear History", help="Clears conversation memory and chat display."):
119
+ clear_chat_history()
120
+
121
+ # Check if the system is initialized and ready
122
+ if st.session_state.chain:
123
+ st.success("System Ready! Ask a question below.")
124
+
125
+
126
+ # --- Main Chat Interface ---
127
+
128
+ # Display chat messages from history
129
+ for message in st.session_state.chat_history:
130
+ with st.chat_message(message["role"]):
131
+ st.write(message["content"])
132
+
133
+ # Initial state prompt
134
+ if not st.session_state.chain and not st.session_state.chat_history:
135
+ st.info("Click **Initialize Chatbot** in the sidebar to load the default Ring App knowledge base.")
136
+ st.stop()
137
+
138
+
139
+ # Chat input box
140
+ if prompt := st.chat_input("Ask a question about Ring App setup, dashboard, or history..."):
141
+ # Immediately display user message
142
+ with st.chat_message("user"):
143
+ st.write(prompt)
144
+
145
+ # Generate and display assistant response
146
+ with st.chat_message("assistant"):
147
+ with st.spinner("Thinking..."):
148
+ response_text = generate_response(prompt)
149
+ st.write(response_text)