Prat0 commited on
Commit
23dedaa
·
verified ·
1 Parent(s): 008ee0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -29
app.py CHANGED
@@ -13,16 +13,29 @@ from llama_index.core.memory import ChatMemoryBuffer
13
  from llama_index.readers.web import FireCrawlWebReader
14
  from llama_index.core import SummaryIndex
15
  import streamlit_analytics2 as streamlit_analytics
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # Setup functions
18
  def embed_setup():
19
- Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
20
- Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
21
 
22
  def qdrant_setup():
23
  client = qdrant_client.QdrantClient(
24
- os.getenv('QDRANT_URL'),
25
- api_key = os.getenv('QDRANT_API_KEY'),
26
  )
27
  return client
28
 
@@ -30,7 +43,7 @@ def llm_setup():
30
  llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
31
  return llm
32
 
33
- def query_index(index, similarity_top_k=3, streaming=True):
34
  memory = ChatMemoryBuffer.from_defaults(token_limit=4000)
35
  chat_engine = index.as_chat_engine(
36
  chat_mode="context",
@@ -57,9 +70,10 @@ def query_index(index, similarity_top_k=3, streaming=True):
57
  def ingest_documents(url):
58
  firecrawl_reader = FireCrawlWebReader(
59
  api_key=os.getenv("FIRECRAWL_API_KEY"),
60
- mode="crawl",
61
  )
62
  documents = firecrawl_reader.load_data(url=url)
 
63
  return documents
64
 
65
  # Streamlit app
@@ -67,7 +81,6 @@ st.title("Talk to Software Documentation")
67
 
68
  st.markdown("""
69
  This tool allows you to chat with software documentation. Here's how to use it:
70
-
71
  1. Enter the URL of the documentation you want to chat about.
72
  2. Click the "Ingest and Setup" button to crawl the documentation and set up the query engine.
73
  3. Once setup is complete, enter your query in the text box.
@@ -75,17 +88,6 @@ This tool allows you to chat with software documentation. Here's how to use it:
75
  5. View your chat history in the sidebar.
76
  """)
77
 
78
- # Initialize session state
79
- if 'chat_engine' not in st.query_params:
80
- st.query_params['chat_engine'] = None
81
- if 'documents' not in st.query_params:
82
- st.query_params['documents'] = None
83
- if 'chat_history' not in st.query_params:
84
- st.query_params['chat_history'] = []
85
- if 'last_response' not in st.query_params:
86
- st.query_params['last_response'] = None
87
-
88
-
89
  with streamlit_analytics.track():
90
  # URL input for document ingestion
91
  url = st.text_input("Enter URL to crawl and ingest documents:")
@@ -94,31 +96,44 @@ with streamlit_analytics.track():
94
  if st.button("Ingest and Setup"):
95
  if url:
96
  with st.spinner("Crawling, ingesting documents, and setting up query engine..."):
97
- st.query_params['documents'] = ingest_documents(url)
98
  embed_setup()
99
  client = qdrant_setup()
100
  llm = llm_setup()
101
  vector_store = QdrantVectorStore(client=client, collection_name=os.getenv("COLLECTION_NAME"))
102
- index = VectorStoreIndex.from_documents(st.query_params['documents'], vector_store=vector_store)
103
- st.query_params['chat_engine'] = query_index(index)
 
104
  st.success(f"Documents ingested from {url} and query engine setup completed successfully!")
105
  else:
106
  st.error("Please enter a URL")
107
 
108
  # Query input
109
- query = st.text_input("Enter your query:")
110
 
111
  # Search button
112
  if st.button("Search"):
113
- if st.query_params['chat_engine'] is None:
114
  st.error("Please complete the setup first")
115
  elif query:
116
  with st.spinner("Searching..."):
117
- response = st.query_params['chat_engine'].chat(query)
118
-
 
 
 
 
 
 
 
 
 
 
 
 
119
  # Add the query and response to chat history
120
- st.query_params['chat_history'].append(("User", query))
121
- st.query_params['chat_history'].append(("Assistant", str(response.response)))
122
 
123
  # Display the most recent response prominently
124
  st.subheader("Assistant's Response:")
@@ -128,10 +143,10 @@ with streamlit_analytics.track():
128
 
129
  # Sidebar for chat history
130
  st.sidebar.title("Chat History")
131
- for role, message in st.query_params['chat_history']:
132
  st.sidebar.text(f"{role}: {message}")
133
 
134
  # Clear chat history button in sidebar
135
  if st.sidebar.button("Clear Chat History"):
136
- st.query_params['chat_history'] = []
137
  st.sidebar.success("Chat history cleared!")
 
13
  from llama_index.readers.web import FireCrawlWebReader
14
  from llama_index.core import SummaryIndex
15
  import streamlit_analytics2 as streamlit_analytics
16
+ import time
17
+
18
+ # Initialize session state
19
+ if 'setup_complete' not in st.session_state:
20
+ st.session_state['setup_complete'] = False
21
+ if 'documents' not in st.session_state:
22
+ st.session_state['documents'] = None
23
+ if 'chat_history' not in st.session_state:
24
+ st.session_state['chat_history'] = []
25
+ if 'index' not in st.session_state:
26
+ st.session_state['index'] = None
27
+
28
+ os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
29
 
30
  # Setup functions
31
  def embed_setup():
32
+ Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
33
+ Settings.llm = Gemini(temperature=0.1, model_name="models/gemini-pro")
34
 
35
  def qdrant_setup():
36
  client = qdrant_client.QdrantClient(
37
+ os.getenv("QDRANT_URL"),
38
+ api_key = os.getenv("QDRANT_API"),
39
  )
40
  return client
41
 
 
43
  llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
44
  return llm
45
 
46
+ def query_index(index, streaming=True):
47
  memory = ChatMemoryBuffer.from_defaults(token_limit=4000)
48
  chat_engine = index.as_chat_engine(
49
  chat_mode="context",
 
70
  def ingest_documents(url):
71
  firecrawl_reader = FireCrawlWebReader(
72
  api_key=os.getenv("FIRECRAWL_API_KEY"),
73
+ mode="scrape",
74
  )
75
  documents = firecrawl_reader.load_data(url=url)
76
+ print(type(documents[0]))
77
  return documents
78
 
79
  # Streamlit app
 
81
 
82
  st.markdown("""
83
  This tool allows you to chat with software documentation. Here's how to use it:
 
84
  1. Enter the URL of the documentation you want to chat about.
85
  2. Click the "Ingest and Setup" button to crawl the documentation and set up the query engine.
86
  3. Once setup is complete, enter your query in the text box.
 
88
  5. View your chat history in the sidebar.
89
  """)
90
 
 
 
 
 
 
 
 
 
 
 
 
91
  with streamlit_analytics.track():
92
  # URL input for document ingestion
93
  url = st.text_input("Enter URL to crawl and ingest documents:")
 
96
  if st.button("Ingest and Setup"):
97
  if url:
98
  with st.spinner("Crawling, ingesting documents, and setting up query engine..."):
99
+ st.session_state['documents'] = ingest_documents(url)
100
  embed_setup()
101
  client = qdrant_setup()
102
  llm = llm_setup()
103
  vector_store = QdrantVectorStore(client=client, collection_name=os.getenv("COLLECTION_NAME"))
104
+ index = VectorStoreIndex.from_documents(st.session_state['documents'], vector_store=vector_store)
105
+ st.session_state['index'] = index
106
+ st.session_state['setup_complete'] = True
107
  st.success(f"Documents ingested from {url} and query engine setup completed successfully!")
108
  else:
109
  st.error("Please enter a URL")
110
 
111
  # Query input
112
+ query = st.text_input("Enter your query:(please click on the search button, do not just press enter)")
113
 
114
  # Search button
115
  if st.button("Search"):
116
+ if not st.session_state['setup_complete']:
117
  st.error("Please complete the setup first")
118
  elif query:
119
  with st.spinner("Searching..."):
120
+ try:
121
+ chat_engine = query_index(st.session_state['index'])
122
+ response = chat_engine.chat(query)
123
+ except Exception as e:
124
+ st.error(f"An error occurred: {str(e)}")
125
+ st.info("Retrying in 120 seconds...")
126
+ time.sleep(120)
127
+ try:
128
+ chat_engine = query_index(st.session_state['index'])
129
+ response = chat_engine.chat(query)
130
+ except Exception as e:
131
+ st.error(f"Retry failed. Error: {str(e)}")
132
+ st.stop()
133
+
134
  # Add the query and response to chat history
135
+ st.session_state['chat_history'].append(("User", query))
136
+ st.session_state['chat_history'].append(("Assistant", str(response.response)))
137
 
138
  # Display the most recent response prominently
139
  st.subheader("Assistant's Response:")
 
143
 
144
  # Sidebar for chat history
145
  st.sidebar.title("Chat History")
146
+ for role, message in st.session_state['chat_history']:
147
  st.sidebar.text(f"{role}: {message}")
148
 
149
  # Clear chat history button in sidebar
150
  if st.sidebar.button("Clear Chat History"):
151
+ st.session_state['chat_history'] = []
152
  st.sidebar.success("Chat history cleared!")