Spaces:

Prat0
/

ClarifyAI

Runtime error

App Files Files Community

Prat0 commited on Jul 7, 2024

Commit

23dedaa

verified ·

1 Parent(s): 008ee0e

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -29

app.py CHANGED Viewed

@@ -13,16 +13,29 @@ from llama_index.core.memory import ChatMemoryBuffer
 from llama_index.readers.web import FireCrawlWebReader
 from llama_index.core import SummaryIndex
 import streamlit_analytics2 as streamlit_analytics
 # Setup functions
 def embed_setup():
-    Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
-    Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
 def qdrant_setup():
     client = qdrant_client.QdrantClient(
-      os.getenv('QDRANT_URL'),
-      api_key = os.getenv('QDRANT_API_KEY'),
     )
     return client
@@ -30,7 +43,7 @@ def llm_setup():
     llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
     return llm
-def query_index(index, similarity_top_k=3, streaming=True):
     memory = ChatMemoryBuffer.from_defaults(token_limit=4000)
     chat_engine = index.as_chat_engine(
         chat_mode="context",
@@ -57,9 +70,10 @@ def query_index(index, similarity_top_k=3, streaming=True):
 def ingest_documents(url):
     firecrawl_reader = FireCrawlWebReader(
         api_key=os.getenv("FIRECRAWL_API_KEY"),
-        mode="crawl",
     )
     documents = firecrawl_reader.load_data(url=url)
     return documents
 # Streamlit app
@@ -67,7 +81,6 @@ st.title("Talk to Software Documentation")
 st.markdown("""
 This tool allows you to chat with software documentation. Here's how to use it:
 1. Enter the URL of the documentation you want to chat about.
 2. Click the "Ingest and Setup" button to crawl the documentation and set up the query engine.
 3. Once setup is complete, enter your query in the text box.
@@ -75,17 +88,6 @@ This tool allows you to chat with software documentation. Here's how to use it:
 5. View your chat history in the sidebar.
 """)
-# Initialize session state
-if 'chat_engine' not in st.query_params:
-    st.query_params['chat_engine'] = None
-if 'documents' not in st.query_params:
-    st.query_params['documents'] = None
-if 'chat_history' not in st.query_params:
-    st.query_params['chat_history'] = []
-if 'last_response' not in st.query_params:
-    st.query_params['last_response'] = None
 with streamlit_analytics.track():
     # URL input for document ingestion
     url = st.text_input("Enter URL to crawl and ingest documents:")
@@ -94,31 +96,44 @@ with streamlit_analytics.track():
     if st.button("Ingest and Setup"):
         if url:
             with st.spinner("Crawling, ingesting documents, and setting up query engine..."):
-                st.query_params['documents'] = ingest_documents(url)
                 embed_setup()
                 client = qdrant_setup()
                 llm = llm_setup()
                 vector_store = QdrantVectorStore(client=client, collection_name=os.getenv("COLLECTION_NAME"))
-                index = VectorStoreIndex.from_documents(st.query_params['documents'], vector_store=vector_store)
-                st.query_params['chat_engine'] = query_index(index)
             st.success(f"Documents ingested from {url} and query engine setup completed successfully!")
         else:
             st.error("Please enter a URL")
     # Query input
-    query = st.text_input("Enter your query:")
     # Search button
     if st.button("Search"):
-        if st.query_params['chat_engine'] is None:
             st.error("Please complete the setup first")
         elif query:
             with st.spinner("Searching..."):
-                response = st.query_params['chat_engine'].chat(query)
             # Add the query and response to chat history
-            st.query_params['chat_history'].append(("User", query))
-            st.query_params['chat_history'].append(("Assistant", str(response.response)))
             # Display the most recent response prominently
             st.subheader("Assistant's Response:")
@@ -128,10 +143,10 @@ with streamlit_analytics.track():
     # Sidebar for chat history
     st.sidebar.title("Chat History")
-    for role, message in st.query_params['chat_history']:
         st.sidebar.text(f"{role}: {message}")
     # Clear chat history button in sidebar
     if st.sidebar.button("Clear Chat History"):
-        st.query_params['chat_history'] = []
         st.sidebar.success("Chat history cleared!")

 from llama_index.readers.web import FireCrawlWebReader
 from llama_index.core import SummaryIndex
 import streamlit_analytics2 as streamlit_analytics
+import time
+# Initialize session state
+if 'setup_complete' not in st.session_state:
+    st.session_state['setup_complete'] = False
+if 'documents' not in st.session_state:
+    st.session_state['documents'] = None
+if 'chat_history' not in st.session_state:
+    st.session_state['chat_history'] = []
+if 'index' not in st.session_state:
+    st.session_state['index'] = None
+os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
 # Setup functions
 def embed_setup():
+    Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
+    Settings.llm = Gemini(temperature=0.1, model_name="models/gemini-pro")
 def qdrant_setup():
     client = qdrant_client.QdrantClient(
+      os.getenv("QDRANT_URL"),
+      api_key = os.getenv("QDRANT_API"),
     )
     return client
     llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
     return llm
+def query_index(index, streaming=True):
     memory = ChatMemoryBuffer.from_defaults(token_limit=4000)
     chat_engine = index.as_chat_engine(
         chat_mode="context",
 def ingest_documents(url):
     firecrawl_reader = FireCrawlWebReader(
         api_key=os.getenv("FIRECRAWL_API_KEY"),
+        mode="scrape",
     )
     documents = firecrawl_reader.load_data(url=url)
+    print(type(documents[0]))
     return documents
 # Streamlit app
 st.markdown("""
 This tool allows you to chat with software documentation. Here's how to use it:
 1. Enter the URL of the documentation you want to chat about.
 2. Click the "Ingest and Setup" button to crawl the documentation and set up the query engine.
 3. Once setup is complete, enter your query in the text box.
 5. View your chat history in the sidebar.
 """)
 with streamlit_analytics.track():
     # URL input for document ingestion
     url = st.text_input("Enter URL to crawl and ingest documents:")
     if st.button("Ingest and Setup"):
         if url:
             with st.spinner("Crawling, ingesting documents, and setting up query engine..."):
+                st.session_state['documents'] = ingest_documents(url)
                 embed_setup()
                 client = qdrant_setup()
                 llm = llm_setup()
                 vector_store = QdrantVectorStore(client=client, collection_name=os.getenv("COLLECTION_NAME"))
+                index = VectorStoreIndex.from_documents(st.session_state['documents'], vector_store=vector_store)
+                st.session_state['index'] = index
+                st.session_state['setup_complete'] = True
             st.success(f"Documents ingested from {url} and query engine setup completed successfully!")
         else:
             st.error("Please enter a URL")
     # Query input
+    query = st.text_input("Enter your query:(please click on the search button, do not just press enter)")
     # Search button
     if st.button("Search"):
+        if not st.session_state['setup_complete']:
             st.error("Please complete the setup first")
         elif query:
             with st.spinner("Searching..."):
+                try:
+                    chat_engine = query_index(st.session_state['index'])
+                    response = chat_engine.chat(query)
+                except Exception as e:
+                    st.error(f"An error occurred: {str(e)}")
+                    st.info("Retrying in 120 seconds...")
+                    time.sleep(120)
+                    try:
+                        chat_engine = query_index(st.session_state['index'])
+                        response = chat_engine.chat(query)
+                    except Exception as e:
+                        st.error(f"Retry failed. Error: {str(e)}")
+                        st.stop()
             # Add the query and response to chat history
+            st.session_state['chat_history'].append(("User", query))
+            st.session_state['chat_history'].append(("Assistant", str(response.response)))
             # Display the most recent response prominently
             st.subheader("Assistant's Response:")
     # Sidebar for chat history
     st.sidebar.title("Chat History")
+    for role, message in st.session_state['chat_history']:
         st.sidebar.text(f"{role}: {message}")
     # Clear chat history button in sidebar
     if st.sidebar.button("Clear Chat History"):
+        st.session_state['chat_history'] = []
         st.sidebar.success("Chat history cleared!")