Spaces:

QAI
/

Chat_QnA_v2

Runtime error

App Files Files Community

binh99 commited on Aug 8, 2023

Commit

a4b89be

1 Parent(s): f7b536f

update cosmos db

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +0 -10
README.md +5 -3
STANDARD_SOFTWARE LIFECYCLES.pdf +0 -0
__pycache__/callback.cpython-39.pyc +0 -0
__pycache__/config.cpython-39.pyc +0 -0
__pycache__/utils.cpython-39.pyc +0 -0
__pycache__/vector_db.cpython-39.pyc +0 -0
app.py +201 -109
auth.json +14 -0
chains/__pycache__/azure_openai.cpython-39.pyc +0 -0
chains/__pycache__/create_topic.cpython-39.pyc +0 -0
chains/__pycache__/custom_chain.cpython-39.pyc +0 -0
chains/__pycache__/decision_maker.cpython-39.pyc +0 -0
chains/__pycache__/model.cpython-39.pyc +0 -0
chains/__pycache__/multi_queries.cpython-39.pyc +0 -0
chains/__pycache__/openai_model.cpython-39.pyc +0 -0
chains/__pycache__/related_question.cpython-39.pyc +0 -0
chains/__pycache__/simple_chain.cpython-39.pyc +0 -0
chains/__pycache__/stage_analyzer.cpython-39.pyc +0 -0
chains/__pycache__/summary.cpython-39.pyc +0 -0
chains/__pycache__/web_search.cpython-39.pyc +0 -0
chains/create_topic.py +26 -0
chains/custom_chain.py +4 -6
chains/decision_maker.py +29 -0
chains/openai_model.py +172 -152
chains/qaibot_chain.py +81 -0
chains/related_question.py +35 -0
chains/simple_chain.py +22 -0
chains/summary.py +27 -9
chains/web_search.py +5 -8
config.py +26 -12
cosmos_db.py +73 -0
custom.css +1026 -0
custom_vectordb.py +421 -0
data.json +0 -0
geckodriver.log +0 -0
history/binh/2023-08-06_17-10-17/Assistance Inquiry.json +1 -0
html_parser.py +0 -116
logo.png +0 -0
process_fb.py +0 -55
process_html.py +0 -58
prompts/__pycache__/condense_llm.cpython-39.pyc +0 -0
prompts/__pycache__/create_topic.cpython-39.pyc +0 -0
prompts/__pycache__/custom_chain.cpython-39.pyc +0 -0
prompts/__pycache__/decision_maker.cpython-39.pyc +0 -0
prompts/__pycache__/llm.cpython-39.pyc +0 -0
prompts/__pycache__/multi_queries.cpython-39.pyc +0 -0
prompts/__pycache__/related_question.cpython-39.pyc +0 -0
prompts/__pycache__/simple_chain.cpython-39.pyc +0 -0
prompts/__pycache__/stage_analyzer.cpython-39.pyc +0 -0

Dockerfile DELETED Viewed

@@ -1,10 +0,0 @@
-FROM python:3.10
-WORKDIR /usr/src/app
-COPY requirements.txt ./
-RUN pip install --no-cache-dir -r requirements.txt
-COPY . .
-CMD [ "python", "app.py" ]

README.md CHANGED Viewed

@@ -1,13 +1,15 @@
 ---
-title: Bot Recommendation
-emoji: 🌖
 colorFrom: red
 colorTo: gray
 sdk: gradio
-sdk_version: 3.34.0
 python_version: 3.9.13
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: 🤖FPT.QAI AI Assistant
+emoji: 🤖
 colorFrom: red
 colorTo: gray
 sdk: gradio
+sdk_version: 3.39.0
 python_version: 3.9.13
 app_file: app.py
 pinned: false
+fullWidth: true
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

STANDARD_SOFTWARE LIFECYCLES.pdf ADDED Viewed

Binary file (864 kB). View file

__pycache__/callback.cpython-39.pyc DELETED Viewed

Binary file (1.15 kB)

__pycache__/config.cpython-39.pyc DELETED Viewed

Binary file (1.18 kB)

__pycache__/utils.cpython-39.pyc DELETED Viewed

Binary file (4.06 kB)

__pycache__/vector_db.cpython-39.pyc DELETED Viewed

Binary file (4.58 kB)

app.py CHANGED Viewed

@@ -2,8 +2,10 @@ import gradio as gr
 from utils import *
 from chains.openai_model import OpenAIModel
-from config import SEVER, PORT, DEBUG, DEPLOYMENT_ID, SAVE_DIR
-from vector_db import delete_all, delete_file, handle_upload_file, update_file
 # Get and load new model
 def get_model(llm_model_name, temperature=0., top_p=1.0):
@@ -15,126 +17,197 @@ def get_model(llm_model_name, temperature=0., top_p=1.0):
 def create_new_model():
     return get_model(llm_model_name=DEPLOYMENT_ID)
 def update_database(files_src):
     message = handle_upload_file(files_src)
-    saved_file = os.listdir(SAVE_DIR)
-    return gr.update(choices=saved_file), message
 # Gradio app
-title = """<h1 align="left" style="min-width:200px; margin-top:6px; white-space: nowrap;">Docs FPT 🤖</h1>"""
-with gr.Blocks() as demo:
     user_name = gr.State("")
     history = gr.State([])
     current_model = gr.State(create_new_model)
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.HTML(title)
             status_text = ""
             status_display = gr.Markdown(status_text, elem_id="status_display")
     with gr.Row().style(equal_height=True):
-        with gr.Column(scale=5):
-            with gr.Row():
-                chatbot = gr.Chatbot([], elem_id="chatbot").style(height="100%")
-            with gr.Row():
-                with gr.Column(min_width=225, scale=12):
-                    user_input = gr.Textbox(
-                        show_label=False, placeholder="Enter here"
-                    ).style(container=False)
-                    # ask_examples_hidden = gr.Textbox(elem_id="hidden-message")
-                    examples_questions = gr.Examples(
-                        [
-                            "Bagaimana cara saya memohon sewa gerai?",
-                            "Bagaimana cara saya pergi dari Komtar ke Pengkalan Weld?",
-                            "Bagaimana cara saya boleh kemaskini Alamat Surat Menyurat Cukai Taksiran",
-                            "What is event's permit at Penang?",
-                            "How to apply car parking at Penang?",
-                            "Where can I request for my event’s permit in Penang?"
-                        ],
-                        [user_input],
-                        examples_per_page=6,
                         )
-                with gr.Column(min_width=42, scale=1):
-                    submitBtn = gr.Button("Send", variant="primary")
             with gr.Row():
-                emptyBtn = gr.Button(
-                    "🧹 New conversation", elem_id="empty_btn")
-                retryBtn = gr.Button("🔄 Retry")
-                rec = gr.Button("⏺️Record")
-                record_audio = gr.inputs.Audio(source="microphone", type="filepath")
             with gr.Row():
-                gr.Markdown(
-                    """
-                    ## 💻 Key Feature
-                    - Chat with an AI chatbot powered by OpenAI's chat API, using the **content of your research document**.
-                    - Get **semantic search** answers from your document using **vector databases**.
-                    - Perform a **Google search** within the app
-                    - **Verify sources** for all generated results.
-                    - Support converting **speech to text** for easy input.
-                    ### Pine cone
-                    Pinecone makes it easy to provide long-term memory for high-performance AI applications.
-                    It's a managed, cloud-native vector database with a simple API and no infrastructure hassles. Pinecone serves fresh, filtered query results with low latency at the scale of billions of vectors.
-                    https://www.pinecone.io/blog/azure/
-                    ### Azure OpenAI Service
-                    https://learn.microsoft.com/en-us/legal/cognitive-services/openai/data-privacy
-                    ## 📧 Contact
-                    This tool has been developed by the R&D lab at **QAI** (FPT Software, Ha Noi, Viet Nam)
-                    If you have any questions or feature requests, please feel free to reach us out at <b>khangnvt1@fpt.com</b>.
-                    """
-                )
-        with gr.Column(min_width=50, scale=1.5):
-            with gr.Tab(label="ChatGPT"):
-                # gr.Markdown(f'<p style="text-align:center">Azure OpenAI Service:<a '
-                #             f'href="https://learn.microsoft.com/en-us/legal/cognitive-services/openai/data-privacy">here</a></p>')
-                index_files = gr.Files(label="Files", type="file", multiple=True)
-                use_websearch = gr.Checkbox(label="Google search", value=False, elem_classes="switch_checkbox")
-                custom_websearch = gr.Checkbox(label="Custom web search", value=False, elem_classes="switch_checkbox")
-            with gr.Tab(label="Configuration"):
-                gr.Markdown(
-                    "⚠️Be careful to change ⚠️\n\nIf you can't use it, please restore the default settings")
-                with gr.Accordion("Parameter", open=False):
-                    temperature_slider = gr.Slider(
-                        minimum=-0,
-                        maximum=1.0,
-                        value=0.0,
-                        step=0.1,
-                        interactive=True,
-                        label="Temperature",
-                    )
-                    top_p_slider = gr.Slider(
-                        minimum=-0,
-                        maximum=1.0,
-                        value=1.0,
-                        step=0.1,
-                        interactive=True,
-                        label="Top_p",
-                    )
-                user_identifier = gr.Textbox(
-                    show_label=True,
-                    placeholder="Enter here",
-                    label="User name",
-                    value=user_name.value,
-                    lines=1,
-                )
-                loadHistoryBtn = gr.Button("💾 Load History")
-            with gr.Tab(label="Knowledge DB"):
-                all_files = gr.Dropdown(
-                label="All available files:", multiselect=True, choices=os.listdir(SAVE_DIR), interactive=True
-                )
-                with gr.Column():
-                    delete_btn = gr.Button("🗑️ Delete")
-                with gr.Column():
-                    delete_all_btn = gr.Button("🗑️ Delete all")
-                update_btn = gr.Button("🗑️ Update DB")
     index_files.change(update_database, [index_files], [all_files, status_display])
     delete_all_btn.click(delete_all, None, [all_files, status_display, index_files])
     delete_btn.click(delete_file, [all_files], [all_files, status_display, index_files])
-    update_btn.click(update_file, None, [status_display])
     emptyBtn.click(
         reset,
         inputs=[current_model],
@@ -142,18 +215,37 @@ with gr.Blocks() as demo:
         show_progress=True,
     )
-    retryBtn.click(retry, [chatbot, current_model, use_websearch, custom_websearch], [chatbot])
-    loadHistoryBtn.click(load_chat_history, [current_model], [chatbot])
-    rec.click(transcribe, [current_model, record_audio], [user_input])
-    user_identifier.change(set_user_indentifier, [current_model, user_identifier], None)
-    user_input.submit(predict, [chatbot, current_model, user_input, use_websearch, custom_websearch], [chatbot, status_display], show_progress=True)
     user_input.submit(lambda: "", None, user_input)
-    submitBtn.click(predict, [chatbot, current_model, user_input, use_websearch, custom_websearch], [chatbot, status_display], show_progress=True)
     submitBtn.click(lambda: "", None, user_input)
     demo.queue(concurrency_count=10).launch(
-        server_name=SEVER, server_port=PORT, debug=DEBUG)

 from utils import *
 from chains.openai_model import OpenAIModel
+from config import SEVER, PORT, DEBUG, DEPLOYMENT_ID
+from vector_db import delete_all, delete_file, handle_upload_file, load_files_blob
+from theme_dropdown import create_theme_dropdown
 # Get and load new model
 def get_model(llm_model_name, temperature=0., top_p=1.0):
 def create_new_model():
     return get_model(llm_model_name=DEPLOYMENT_ID)
 def update_database(files_src):
     message = handle_upload_file(files_src)
+    available_files = load_files_blob()
+    return gr.update(choices=available_files), message
+def get_available_files():
+    available_files = load_files_blob()
+    return gr.update(choices=available_files), gr.update(visible=True)
+def update_example(chatbot, set_save_file_name):
+    from chains.related_question import RelatedQuestion
+    from chains.create_topic import CreateTopic
+    related_question = RelatedQuestion()
+    outputs = chatbot[-1][1].split("<div")[0]
+    res = related_question.predict(inputs=chatbot[-1][0], outputs=outputs)
+    out = list(map(lambda x: x.split('- ')[-1], res.split('\n')))
+    samples = [[a] for a in out]
+    if len(chatbot) == 1:
+        topic_chain = CreateTopic()
+        topic = topic_chain.predict(inputs=chatbot[-1][0], outputs=outputs)
+        set_save_file_name = topic
+    return chatbot, gr.Dataset.update(samples=samples), samples, set_save_file_name
+def load_example(example_id, samples):
+    return samples[example_id][0]
 # Gradio app
+with open("custom.css", "r", encoding="utf-8") as f:
+    customCSS = f.read()
+dropdown, js = create_theme_dropdown()
+head = """
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>FPT Bot</title>
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-4bw+/aepP/YC94hEpVNVgiZdgIC5+VKNBQNGCHeKRQN+PtmoHDEXuppvnDJzQIu9" crossorigin="anonymous">
+  </head>
+  <body>
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.1/dist/js/bootstrap.bundle.min.js" integrity="sha384-HwwvtgBNo3bZJJLYd8oVXjrBZt8cqVSpeBNS5n7C8IVInixGAoxmnlMuBnhbgrkm" crossorigin="anonymous"></script>
+  </body>
+</html>
+"""
+checkbox_js = """
+    async () => {
+        // Select all checkboxes with the class 'svelte-1ojmf70'
+        const checkboxes = document.querySelectorAll('.svelte-1ojmf70[type="checkbox"]');
+        // Add a click event listener to each checkbox
+        checkboxes.forEach(checkbox => {
+            checkbox.addEventListener('click', function() {
+                // If this checkbox was checked, uncheck all others
+                if (this.checked) {
+                    checkboxes.forEach(otherCheckbox => {
+                        if (otherCheckbox !== this) {
+                            otherCheckbox.checked = false;
+                        }
+                    });
+                }
+            });
+        });
+    }
+"""
+title = """<h1 align="left" style="min-width:200px; margin-top:6px; white-space: nowrap;">AI Assistant 🤖</h1>"""
+logo = """
+<div class="logo"></div>
+"""
+user_input = gr.Textbox()
+with gr.Blocks(css=customCSS, theme='minatosnow/qaigpt') as demo:
+    samples = gr.State()
     user_name = gr.State("")
     history = gr.State([])
     current_model = gr.State(create_new_model)
+    gr.HTML(head)
+    with gr.Row(elem_classes="status-div"):
+        with gr.Column():
+            gr.HTML(logo)
+            user_info = gr.Markdown(value="getting user info...", elem_id="user_info")
+        with gr.Column():
             status_text = ""
             status_display = gr.Markdown(status_text, elem_id="status_display")
     with gr.Row().style(equal_height=True):
+        with gr.Column(scale=1):
+            with gr.Tab(label="Database"):
+                with gr.Accordion("Upload file", open=True, visible=False) as acc:
+                    with gr.Row():
+                        index_files = gr.Files(label="Files", type="file")
+                        all_files = gr.Dropdown(
+                            label=None, show_label=False, multiselect=True, choices=load_files_blob(), interactive=True
                         )
+                    with gr.Row():
+                        with gr.Column(min_width=42, scale=1):
+                            delete_btn = gr.Button("", elem_classes="btn btn-del tooltip-btn tooltip-del")
+                        with gr.Column(min_width=42, scale=1):
+                            delete_all_btn = gr.Button("", elem_classes="btn btn-del-all tooltip-btn tooltip-del-all")
+                upload_files_btn = gr.Checkbox(label="Upload files", value=False, elem_classes="switch_checkbox")
+                local_db = gr.Checkbox(label="Local knowledge DB", value=False, elem_classes="switch_checkbox")
+                custom_websearch = gr.Checkbox(label="FPT web search", value=False, elem_classes="switch_checkbox")
+                local_db.change(None, _js=checkbox_js)
+                upload_files_btn.change(None, _js=checkbox_js)
+                custom_websearch.change(None, _js=checkbox_js)
+            with gr.Tab(label="History"):
+                with gr.Accordion("Save/Load conversation history"):
+                    with gr.Column():
+                        with gr.Row():
+                            with gr.Column(scale=6):
+                                history_file_dropdown = gr.Dropdown(
+                                    label="Load conversation from list",
+                                    choices=get_history_names(plain=True),
+                                    multiselect=False,
+                                    container=False,
+                                )
+                            with gr.Row():
+                                with gr.Column(min_width=42, scale=1):
+                                    historyRefreshBtn = gr.Button("🔄 Refresh")
+                                with gr.Column(min_width=42, scale=1):
+                                    historyDeleteBtn = gr.Button("🗑️ Delete")
+                        with gr.Row():
+                            with gr.Column(scale=6):
+                                set_save_file_name = gr.Textbox(
+                                    show_label=True,
+                                    placeholder=None,
+                                    label="Topic (File name)",
+                                )
+                            with gr.Column(scale=1):
+                                saveHistoryBtn = gr.Button("💾 Save History")
+            with gr.Tab(label="Theme"):
+                toggle_dark = gr.Button(value="Toggle Light/Dark")
+                toggle_dark.click(
+                    None,
+                    _js="""
+                    () => {
+                        document.body.classList.toggle('dark');
+                    }
+                    """,
+                )
+        with gr.Column(scale=9):
             with gr.Row():
+                chatbot = gr.Chatbot(show_label=False, elem_classes="chatbot", show_share_button=False, height=650)
             with gr.Row():
+                examples_questions = gr.Dataset(samples=[], components=[user_input], type="index")
+            with gr.Row(elem_classes="chatrow"):
+                with gr.Column(min_width=225, scale=10):
+                    user_input = gr.Textbox(show_label=False, placeholder="Ask me anything...", container=False)
+                with gr.Column(min_width=42, scale=1):
+                    submitBtn = gr.Button("", elem_classes="btn btn-send tooltip-btn tooltip-content-send")
+                with gr.Column(min_width=42, scale=1):
+                    record_audio = gr.Audio(source="microphone",
+                                            show_label=False,
+                                            elem_classes="audio-btn btn",
+                                            type="filepath")
+                with gr.Column(min_width=42, scale=1):
+                    emptyBtn = gr.Button(
+                        "", elem_classes="btn btn-clear tooltip-btn tooltip-content-clear")
+    with gr.Row(elem_classes="footer"):
+        gr.HTML("""<footer>🤖 QGPT – Developed by FPT.QAI</footer>""")
+    def create_greeting(request: gr.Request):
+        if hasattr(request, "username") and request.username:  # is not None or is not ""
+            print(f"User Name: {request.username}")
+            user_info, user_name = gr.Markdown.update(value=f"Hi {request.username}!"), request.username
+        else:
+            user_info, user_name = gr.Markdown.update(value="", visible=False), ""
+        current_model = get_model(llm_model_name=DEPLOYMENT_ID)
+        current_model.set_user_identifier(user_name)
+        return user_info, user_name, current_model, get_history_names(False, user_name)
+    demo.load(create_greeting, inputs=None, outputs=[user_info, user_name, current_model, history_file_dropdown])
+    examples_questions.click(load_example, inputs=[examples_questions, samples], outputs=[user_input])
     index_files.change(update_database, [index_files], [all_files, status_display])
+    upload_files_btn.change(get_available_files, None, [all_files, acc])
     delete_all_btn.click(delete_all, None, [all_files, status_display, index_files])
     delete_btn.click(delete_file, [all_files], [all_files, status_display, index_files])
+    # update_btn.click(update_fb, None, [status_display])
     emptyBtn.click(
         reset,
         inputs=[current_model],
         show_progress=True,
     )
+    # retryBtn.click(retry, [chatbot, current_model, use_websearch, custom_websearch], [chatbot, status_display])
+    saveHistoryBtn.click(save_chat_history, [current_model, chatbot, set_save_file_name], [status_display])
+    historyRefreshBtn.click(get_history_names, [gr.State(False), user_name], [history_file_dropdown])
+    historyDeleteBtn.click(delete_chat_history, [current_model, history_file_dropdown], [status_display, history_file_dropdown, chatbot])
+    history_file_dropdown.change(load_chat_history, [current_model, history_file_dropdown], [set_save_file_name, chatbot])
+    record_audio.start_recording(None, None, None,
+                                 _js="""
+                                     async () => {
+                                            document.querySelectorAll('.sm.secondary').forEach(function(element) {
+                                            element.classList.remove('secondary');
+                                            element.classList.add('tertiary');
+                                        });
+                                     }
+                                 """
+                                 )
+    record_audio.stop_recording(transcribe, [current_model, record_audio], [user_input, record_audio])
+    # user_identifier.change(set_user_identifier, [current_model, user_identifier], None)
+    user_input.submit(predict, [chatbot, current_model, user_input, upload_files_btn, custom_websearch, local_db],
+                      [chatbot, status_display], show_progress=True).then(update_example, [chatbot, set_save_file_name],
+                                                                          [chatbot, examples_questions, samples,
+                                                                           set_save_file_name])
     user_input.submit(lambda: "", None, user_input)
+    submitBtn.click(predict, [chatbot, current_model, user_input, upload_files_btn, custom_websearch, local_db],
+                    [chatbot, status_display], show_progress=True).then(update_example, [chatbot, set_save_file_name],
+                                                                        [chatbot, examples_questions, samples,
+                                                                         set_save_file_name])
     submitBtn.click(lambda: "", None, user_input)
     demo.queue(concurrency_count=10).launch(
+        server_name=SEVER, server_port=PORT, auth=get_auth(), debug=DEBUG)

auth.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "user1": {
+      "username": "binh",
+      "password": "123456"
+    },
+    "user2": {
+      "username": "hung",
+      "password": "123456"
+    },
+    "user3": {
+      "username": "khang",
+      "password": "123456"
+    }
+}

chains/__pycache__/azure_openai.cpython-39.pyc CHANGED Viewed

Binary files a/chains/__pycache__/azure_openai.cpython-39.pyc and b/chains/__pycache__/azure_openai.cpython-39.pyc differ

chains/__pycache__/create_topic.cpython-39.pyc ADDED Viewed

Binary file (1.25 kB). View file

chains/__pycache__/custom_chain.cpython-39.pyc CHANGED Viewed

Binary files a/chains/__pycache__/custom_chain.cpython-39.pyc and b/chains/__pycache__/custom_chain.cpython-39.pyc differ

chains/__pycache__/decision_maker.cpython-39.pyc CHANGED Viewed

Binary files a/chains/__pycache__/decision_maker.cpython-39.pyc and b/chains/__pycache__/decision_maker.cpython-39.pyc differ

chains/__pycache__/model.cpython-39.pyc DELETED Viewed

Binary file (5.7 kB)

chains/__pycache__/multi_queries.cpython-39.pyc DELETED Viewed

Binary file (1.43 kB)

chains/__pycache__/openai_model.cpython-39.pyc CHANGED Viewed

Binary files a/chains/__pycache__/openai_model.cpython-39.pyc and b/chains/__pycache__/openai_model.cpython-39.pyc differ

chains/__pycache__/related_question.cpython-39.pyc ADDED Viewed

Binary file (1.69 kB). View file

chains/__pycache__/simple_chain.cpython-39.pyc ADDED Viewed

Binary file (1.1 kB). View file

chains/__pycache__/stage_analyzer.cpython-39.pyc DELETED Viewed

Binary file (1.23 kB)

chains/__pycache__/summary.cpython-39.pyc CHANGED Viewed

Binary files a/chains/__pycache__/summary.cpython-39.pyc and b/chains/__pycache__/summary.cpython-39.pyc differ

chains/__pycache__/web_search.cpython-39.pyc CHANGED Viewed

Binary files a/chains/__pycache__/web_search.cpython-39.pyc and b/chains/__pycache__/web_search.cpython-39.pyc differ

chains/create_topic.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from langchain.chains.llm import LLMChain
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate)
+from prompts.create_topic import SYSTEM_PROMPT_TEMPLATE, HUMAN_PROMPT_TEMPLATE
+from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, DEPLOYMENT_ID
+from chains.azure_openai import CustomAzureOpenAI
+class CreateTopic(LLMChain):
+    llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
+                    openai_api_type=OPENAI_API_TYPE,
+                    openai_api_base=OPENAI_API_BASE,
+                    openai_api_version=OPENAI_API_VERSION,
+                    openai_api_key=OPENAI_API_KEY,
+                    temperature=0.0)
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            SystemMessagePromptTemplate.from_template(SYSTEM_PROMPT_TEMPLATE),
+            HumanMessagePromptTemplate.from_template(HUMAN_PROMPT_TEMPLATE)
+        ])
+if __name__ == "__main__":
+    chain = CreateTopic()
+    out = chain.predict(inputs="Hello", outputs="Hello! how can I assis you today?")
+    print(out)

chains/custom_chain.py CHANGED Viewed

@@ -11,7 +11,6 @@ from config import DEPLOYMENT_ID
 from prompts.custom_chain import SYSTEM_PROMPT_TEMPLATE, HUMAN_PROMPT_TEMPLATE
 from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE
 from chains.azure_openai import CustomAzureOpenAI
-import os
 class MultiQueriesChain(LLMChain):
     llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
@@ -41,12 +40,11 @@ class CustomConversationalRetrievalChain(ConversationalRetrievalChain):
         docs = self.retriever.get_relevant_documents(
             question
         )
         for (idx, d) in enumerate(docs):
-            if "https:" in d.metadata["source"]:
-                item = [d.page_content.strip("�"), d.metadata["source"]]
-            else:
-                item = [d.page_content.strip("�"), os.path.basename(d.metadata["source"])]
-            d.page_content = f'[{idx+1}]\t "{item[0]}"\nSource: {item[1]}'
         return self._reduce_tokens_below_limit(docs)
     # def _get_docs(self, question: str, inputs: Dict[str, Any]) -> List[Document]:
     #     results = llm_chain.predict(question=question) + "\n"

 from prompts.custom_chain import SYSTEM_PROMPT_TEMPLATE, HUMAN_PROMPT_TEMPLATE
 from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE
 from chains.azure_openai import CustomAzureOpenAI
 class MultiQueriesChain(LLMChain):
     llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
         docs = self.retriever.get_relevant_documents(
             question
         )
+        # Add attribute to docs call docs.citation
         for (idx, d) in enumerate(docs):
+            item = [d.page_content.strip("�"), d.metadata["source"]]
+            d.page_content = f'[{idx+1}] {item[0]}'
+            d.metadata["source"] = f'{item[1]}'
         return self._reduce_tokens_below_limit(docs)
     # def _get_docs(self, question: str, inputs: Dict[str, Any]) -> List[Document]:
     #     results = llm_chain.predict(question=question) + "\n"

chains/decision_maker.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from langchain.chains.llm import LLMChain
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate)
+from prompts.decision_maker import SYSTEM_PROMPT_TEMPLATE, HUMAN_PROMPT_TEMPLATE
+from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, DEPLOYMENT_ID
+from chains.azure_openai import CustomAzureOpenAI
+class DecisionMaker(LLMChain):
+    llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
+                    openai_api_type=OPENAI_API_TYPE,
+                    openai_api_base=OPENAI_API_BASE,
+                    openai_api_version=OPENAI_API_VERSION,
+                    openai_api_key=OPENAI_API_KEY,
+                    temperature=0.0)
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            SystemMessagePromptTemplate.from_template(SYSTEM_PROMPT_TEMPLATE),
+            HumanMessagePromptTemplate.from_template(HUMAN_PROMPT_TEMPLATE)
+        ])
+if __name__ == "__main__":
+    rel = DecisionMaker()
+    query = "Ai là tổng thống Mỹ"
+    res = rel.predict(query = query)
+    print(res)

chains/openai_model.py CHANGED Viewed

@@ -1,34 +1,36 @@
 import json
 import os
 import openai
 from langchain.prompts import PromptTemplate
-from config import TIMEOUT_STREAM
 from vector_db import upload_file
 from callback import StreamingGradioCallbackHandler
 from queue import SimpleQueue, Empty, Queue
 from threading import Thread
-from utils import history_file_path, load_lasted_file_username, add_source_numbers, add_details
 from chains.custom_chain import CustomConversationalRetrievalChain
 from langchain.chains import LLMChain
 from chains.azure_openai import CustomAzureOpenAI
 from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, API_KEY, \
-                DEPLOYMENT_ID, MODEL_ID, EMBEDDING_API_KEY, EMBEDDING_API_BASE
 class OpenAIModel:
     def __init__(
-        self,
-        llm_model_name,
-        condense_model_name,
-        prompt_template="",
-        temperature=0.0,
-        top_p=1.0,
-        n_choices=1,
-        stop = None,
-        presence_penalty=0,
-        frequency_penalty=0,
-        user = None
     ):
         self.llm_model_name = llm_model_name
         self.condense_model_name = condense_model_name
@@ -43,13 +45,14 @@ class OpenAIModel:
         self.history = []
         self.user_identifier = user
-    def set_user_indentifier(self, new_user_indentifier):
-        self.user_identifier = new_user_indentifier
     def format_prompt(self, qa_prompt_template, condense_prompt_template):
         # Prompt template langchain
         qa_prompt = PromptTemplate(template=qa_prompt_template, input_variables=["question", "chat_history", "context"])
-        condense_prompt = PromptTemplate(template=condense_prompt_template, input_variables=["question", "chat_history"])
         return qa_prompt, condense_prompt
     def memory(self, inputs, outputs, last_k=3):
@@ -65,166 +68,85 @@ class OpenAIModel:
     def delete_first_conversation(self):
         if self.history:
             self.history.pop(0)
     def delete_last_conversation(self):
         if len(self.history) > 0:
             self.history.pop()
-    def auto_save_history(self, chatbot):
-        if self.user_identifier is not None:
-            file_path = history_file_path(self.user_identifier)
-            json_s = {"history": self.history, "chatbot": chatbot}
-            with open(file_path, "w", encoding='utf-8') as f:
-                json.dump(json_s, f, ensure_ascii=False)
-    def load_history(self):
-        lasted_file = load_lasted_file_username(self.user_identifier)
-        if lasted_file is not None:
-            with open(f"{lasted_file}.json", "r", encoding="utf-8") as f:
-                json_s = json.load(f)
-            self.history = json_s["history"]
-            chatbot = json_s["chatbot"]
-            return chatbot
-    def audio_response(self, audio):
-        media_file = open(audio, 'rb')
-        response = openai.Audio.transcribe(
-                    api_key=API_KEY,
-                    model=MODEL_ID,
-                    file=media_file
-                )
-        return response["text"]
-    def inference(self, inputs, chatbot, streaming=False, use_websearch=False, custom_websearch=False, **kwargs):
-        if use_websearch or custom_websearch:
-            import requests
-            from bs4 import BeautifulSoup
-            from langchain.utilities.google_search import GoogleSearchAPIWrapper
-            from chains.web_search import GoogleWebSearch
-            from config import GOOGLE_API_KEY, GOOGLE_CSE_ID, CUSTOM_API_KEY, CUSTOM_CSE_ID
-            from chains.summary import WebSummary
-            from chains.multi_queries import MultiQueries
-            status_text = "Retrieving information from the web"
-            yield chatbot, status_text
-            if use_websearch:
-                google_api_key = GOOGLE_API_KEY
-                google_cse_id = GOOGLE_CSE_ID
-            else:
-                google_api_key = CUSTOM_API_KEY
-                google_cse_id = CUSTOM_CSE_ID
-            search = GoogleSearchAPIWrapper(google_api_key=google_api_key, google_cse_id=google_cse_id)
-            queries_chain = MultiQueries()
-            out = queries_chain.predict(question=inputs)
-            queries = list(map(lambda x: x.split(': ')[-1], out.split('\n\n')))
-            print(queries)
-            results = []
-            for query in queries:
-                search_rs = search.results(query, 2)
-                results.extend(search_rs)
-            reference_results = []
-            display_append = []
-            for idx, result in enumerate(results[:3]):
-                try:
-                    head = requests.head(result['link'])
-                    print(result["link"])
-                    status_text = "Access " + result['link']
-                    yield chatbot, status_text
-                    if "text/html" in head.headers['Content-Type']:
-                        html_response = requests.get(result['link'])
-                        soup = BeautifulSoup(html_response.content, "html.parser")
-                        try:
-                            web_summary = WebSummary()
-                            text = soup.get_text()
-                            lines = (line.strip() for line in text.splitlines())
-                            # break multi-headlines into a line each
-                            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-                            # drop blank lines
-                            text = '\n'.join(chunk for chunk in chunks if chunk)
-                            summary = web_summary.predict(question=inputs, doc=text)
-                            print("Can access", result['link'])
-                            # break into lines and remove leading and trailing space on each
-                        except:
-                            print("Cannot access ", result['link'])
-                            yield chatbot, status_text
-                        reference_results.append([summary, result['link']])
-                        display_append.append(
-                            f"<a href=\"{result['link']}\" target=\"_blank\">{idx+1}.&nbsp;{result['title']}</a>"
-                        )
-                except:
-                    continue
-            reference_results = add_source_numbers(reference_results)
-            display_append = '<div class = "source-a">' + "\n".join(display_append) + '</div>'
-            status_text = "Request URL: " + OPENAI_API_BASE
-            yield chatbot, status_text
-            chatbot.append((inputs, ""))
-            web_search = GoogleWebSearch()
-            ai_response = web_search.predict(context="\n\n".join(reference_results), question=inputs, chat_history=self.history)
-            chatbot[-1] = (chatbot[-1][0], ai_response+display_append)
-            self.memory(inputs, ai_response)
-            self.auto_save_history(chatbot)
-            yield chatbot, status_text
-        else:
             status_text = "Indexing files to vector database"
             yield chatbot, status_text
-            vectorstore = upload_file()
-            status_text = "OpenAI version: " + OPENAI_API_VERSION
-            yield chatbot, status_text
             qa_prompt, condense_prompt = self.format_prompt(**kwargs)
             job_done = object()  # signals the processing is done
             q = SimpleQueue()
             if streaming:
                 timeout = TIMEOUT_STREAM
-                streaming_callback =[StreamingGradioCallbackHandler(q)]
             # Define llm model
-            llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
                                     openai_api_type=OPENAI_API_TYPE,
                                     openai_api_base=OPENAI_API_BASE,
                                     openai_api_version=OPENAI_API_VERSION,
                                     openai_api_key=OPENAI_API_KEY,
                                     temperature=self.temperature,
-                                    model_kwargs={"top_p": self.top_p},
-                                    streaming=streaming,\
-                                    callbacks=streaming_callback,
                                     request_timeout=timeout)
-            condense_llm = CustomAzureOpenAI(deployment_name=self.condense_model_name,
-                                            openai_api_type=OPENAI_API_TYPE,
-                                            openai_api_base=OPENAI_API_BASE,
-                                            openai_api_version=OPENAI_API_VERSION,
-                                            openai_api_key=OPENAI_API_KEY,
-                                            temperature=self.temperature)
             status_text = "Request URL: " + OPENAI_API_BASE
             yield chatbot, status_text
-            # Create a funciton to call - this will run in a thread
             # Create a Queue object
             response_queue = SimpleQueue()
             def task():
-                # Converation + RetrivalChain
-                qa = CustomConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(k=5),
-                                                condense_question_llm = condense_llm, verbose=True,
-                                                condense_question_prompt=condense_prompt,
-                                                combine_docs_chain_kwargs={"prompt": qa_prompt},
-                                                return_source_documents=True)
                 # query with input and chat history
                 response = qa({"question": inputs, "chat_history": self.history})
                 response_queue.put(response)
                 q.put(job_done)
             thread = Thread(target=task)
             thread.start()
             chatbot.append((inputs, ""))
@@ -243,16 +165,114 @@ class OpenAIModel:
             # add citation info to response
             response = response_queue.get()
             relevant_docs = response["source_documents"]
-            reference_results = [d.page_content for d in relevant_docs]
-            display_append = add_details(reference_results)
-            display_append = "\n\n" + "<details><summary><b>Citation</b></summary>"+ "".join(display_append) + "</details>"
-            chatbot[-1] = (chatbot[-1][0], content+display_append)
             yield chatbot, status_text
             self.memory(inputs, content)
-            self.auto_save_history(chatbot)
             thread.join()
 if __name__ == '__main__':
     import os
     from config import OPENAI_API_KEY
@@ -261,7 +281,8 @@ if __name__ == '__main__':
         ChatPromptTemplate,
         SystemMessagePromptTemplate,
         HumanMessagePromptTemplate)
-    SYSTEM_PROMPT_TEMPLATE = "You're a helpfull assistant."
     HUMAN_PROMPT_TEMPLATE = "Human: {question}\n AI answer:"
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -269,13 +290,12 @@ if __name__ == '__main__':
             HumanMessagePromptTemplate.from_template(HUMAN_PROMPT_TEMPLATE)
         ]
     )
-    print("-===============")
-    llm = CustomAzureOpenAI(deployment_name="binh-gpt",
                             openai_api_key=OPENAI_API_KEY,
                             openai_api_base=OPENAI_API_BASE,
                             openai_api_version=OPENAI_API_VERSION,
                             temperature=0,
-                            model_kwargs={"top_p": 1.0},)
     llm_chain = LLMChain(
         llm=llm,
         prompt=prompt

 import json
 import os
+import re
 import openai
 from langchain.prompts import PromptTemplate
+from config import TIMEOUT_STREAM, HISTORY_DIR
 from vector_db import upload_file
 from callback import StreamingGradioCallbackHandler
 from queue import SimpleQueue, Empty, Queue
 from threading import Thread
+from utils import add_source_numbers, add_details, web_citation, get_history_names
 from chains.custom_chain import CustomConversationalRetrievalChain
 from langchain.chains import LLMChain
 from chains.azure_openai import CustomAzureOpenAI
 from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, API_KEY, \
+    DEPLOYMENT_ID, MODEL_ID
+from cosmos_db import upsert_item, read_item, delete_items, query_items
 class OpenAIModel:
     def __init__(
+            self,
+            llm_model_name,
+            condense_model_name,
+            prompt_template="",
+            temperature=0.0,
+            top_p=1.0,
+            n_choices=1,
+            stop=None,
+            presence_penalty=0,
+            frequency_penalty=0,
+            user=None
     ):
         self.llm_model_name = llm_model_name
         self.condense_model_name = condense_model_name
         self.history = []
         self.user_identifier = user
+    def set_user_identifier(self, new_user_identifier):
+        self.user_identifier = new_user_identifier
     def format_prompt(self, qa_prompt_template, condense_prompt_template):
         # Prompt template langchain
         qa_prompt = PromptTemplate(template=qa_prompt_template, input_variables=["question", "chat_history", "context"])
+        condense_prompt = PromptTemplate(template=condense_prompt_template,
+                                         input_variables=["question", "chat_history"])
         return qa_prompt, condense_prompt
     def memory(self, inputs, outputs, last_k=3):
     def delete_first_conversation(self):
         if self.history:
             self.history.pop(0)
     def delete_last_conversation(self):
         if len(self.history) > 0:
             self.history.pop()
+    def save_history(self, chatbot, file_name):
+        message = upsert_item(self.user_identifier, file_name, self.history, chatbot)
+        return message
+    def load_history(self, file_name):
+        items = read_item(self.user_identifier, file_name)
+        return items['id'], items['chatbot']
+    def delete_history(self, file_name):
+        message = delete_items(self.user_identifier, file_name)
+        return message, get_history_names(False, self.user_identifier), []
+    def audio_response(self, audio):
+        media_file = open(audio, 'rb')
+        response = openai.Audio.transcribe(
+            api_key=API_KEY,
+            model=MODEL_ID,
+            file=media_file
+        )
+        return response["text"], None
+    def inference(self, inputs, chatbot, streaming=False, upload_files_btn=False, custom_websearch=False,
+                  local_db=False,
+                  **kwargs):
+        if upload_files_btn or local_db:
             status_text = "Indexing files to vector database"
             yield chatbot, status_text
+            vectorstore = upload_file(upload_files_btn)
             qa_prompt, condense_prompt = self.format_prompt(**kwargs)
             job_done = object()  # signals the processing is done
             q = SimpleQueue()
             if streaming:
                 timeout = TIMEOUT_STREAM
+                streaming_callback = [StreamingGradioCallbackHandler(q)]
             # Define llm model
+            llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
                                     openai_api_type=OPENAI_API_TYPE,
                                     openai_api_base=OPENAI_API_BASE,
                                     openai_api_version=OPENAI_API_VERSION,
                                     openai_api_key=OPENAI_API_KEY,
                                     temperature=self.temperature,
+                                    model_kwargs={"top_p": self.top_p},
+                                    streaming=streaming, \
+                                    callbacks=streaming_callback,
                                     request_timeout=timeout)
+            condense_llm = CustomAzureOpenAI(deployment_name=self.condense_model_name,
+                                             openai_api_type=OPENAI_API_TYPE,
+                                             openai_api_base=OPENAI_API_BASE,
+                                             openai_api_version=OPENAI_API_VERSION,
+                                             openai_api_key=OPENAI_API_KEY,
+                                             temperature=self.temperature)
             status_text = "Request URL: " + OPENAI_API_BASE
             yield chatbot, status_text
+            # Create a function to call - this will run in a thread
             # Create a Queue object
             response_queue = SimpleQueue()
             def task():
+                # Conversation + RetrivalChain
+                qa = CustomConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(
+                    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.75}),
+                                                                 condense_question_llm=condense_llm, verbose=True,
+                                                                 condense_question_prompt=condense_prompt,
+                                                                 combine_docs_chain_kwargs={"prompt": qa_prompt},
+                                                                 return_source_documents=True)
                 # query with input and chat history
                 response = qa({"question": inputs, "chat_history": self.history})
                 response_queue.put(response)
                 q.put(job_done)
             thread = Thread(target=task)
             thread.start()
             chatbot.append((inputs, ""))
             # add citation info to response
             response = response_queue.get()
             relevant_docs = response["source_documents"]
+            if len(relevant_docs) == 0:
+                display_append = ""
+            else:
+                if upload_files_btn:
+                    reference_results = [d.page_content for d in relevant_docs]
+                    reference_sources = [d.metadata["source"] for d in relevant_docs]
+                    display_append = add_details(reference_results, reference_sources)
+                    display_append = '<div class = "source-a">' + "\n".join(display_append) + '</div>'
+                else:
+                    display_append = []
+                    for idx, d in enumerate(relevant_docs):
+                        link = d.metadata["source"]
+                        title = d.page_content.split("\n")[0]
+                        # Remove non word characters and blank space before title
+                        title = re.sub(r"[^\w\s]", "", title[:4]).strip()
+                        display_append.append(
+                            f'<a href=\"{link}\" target=\"_blank\">[{idx + 1}] {title}</a>'
+                        )
+                    display_append = '<div class = "source-a">' + "\n".join(display_append) + '</div>'
+            chatbot[-1] = (chatbot[-1][0], content + display_append)
             yield chatbot, status_text
             self.memory(inputs, content)
+            # self.auto_save_history(chatbot)
             thread.join()
+        else:
+            import requests
+            from langchain.utilities.google_search import GoogleSearchAPIWrapper
+            from chains.web_search import GoogleWebSearch
+            from config import GOOGLE_API_KEY, GOOGLE_CSE_ID
+            top_k = 4
+            if custom_websearch:
+                status_text = "Retrieving information from website FPTSoftware.com"
+                yield chatbot, status_text
+                params = {
+                    "q": inputs,
+                    "v": "\{539C9DC1-663A-418D-82A4-662D34EE34BC\}",
+                    "p": 10,
+                    "l": "en",
+                    "s": "{EACE8DB5-668F-4357-9782-405070D28D11}",
+                    "itemid": "\{91F4101E-B1F3-4905-A832-96F703D3FBB1\}",
+                }
+                req = requests.get(
+                    "https://fptsoftware.com//sxa/search/results/?",
+                    params=params
+                )
+                res = json.loads(req.text)
+                results = []
+                for r in res["Results"][:top_k]:
+                    link = "https://fptsoftware.com" + r["Url"]
+                    results.append({"link": link})
+                reference_results, display_append = web_citation(inputs, results, True)
+                reference_results = add_source_numbers(reference_results)
+                display_append = '<div class = "source-a">' + "\n".join(display_append) + '</div>'
+                status_text = "Request URL: " + OPENAI_API_BASE
+                yield chatbot, status_text
+                chatbot.append((inputs, ""))
+                web_search = GoogleWebSearch()
+                ai_response = web_search.predict(context="\n\n".join(reference_results), question=inputs,
+                                                 chat_history=self.history)
+                chatbot[-1] = (chatbot[-1][0], ai_response + display_append)
+                self.memory(inputs, ai_response)
+                # self.auto_save_history(chatbot)
+                yield chatbot, status_text
+            else:
+                from chains.decision_maker import DecisionMaker
+                from chains.simple_chain import SimpleChain
+                decision_maker = DecisionMaker()
+                simple_chain = SimpleChain()
+                decision = decision_maker.predict(question=inputs)
+                if "LLM Model" in decision:
+                    status_text = "Request URL: " + OPENAI_API_BASE
+                    yield chatbot, status_text
+                    chatbot.append((inputs, ""))
+                    ai_response = simple_chain.predict(question=inputs)
+                    chatbot[-1] = (chatbot[-1][0], ai_response)
+                    self.memory(inputs, ai_response)
+                    # self.auto_save_history(chatbot)
+                    yield chatbot, status_text
+                else:
+                    status_text = "Retrieving information from Google"
+                    yield chatbot, status_text
+                    search = GoogleSearchAPIWrapper(google_api_key=GOOGLE_API_KEY, google_cse_id=GOOGLE_CSE_ID)
+                    results = search.results(inputs, num_results=top_k)
+                    reference_results, display_append = web_citation(inputs, results, False)
+                    reference_results = add_source_numbers(reference_results)
+                    display_append = '<div class = "source-a">' + "\n".join(display_append) + '</div>'
+                    status_text = "Request URL: " + OPENAI_API_BASE
+                    yield chatbot, status_text
+                    chatbot.append((inputs, ""))
+                    web_search = GoogleWebSearch()
+                    ai_response = web_search.predict(context="\n\n".join(reference_results), question=inputs,
+                                                     chat_history=self.history)
+                    chatbot[-1] = (chatbot[-1][0], ai_response + display_append)
+                    self.memory(inputs, ai_response)
+                    # self.auto_save_history(chatbot)
+                    yield chatbot, status_text
 if __name__ == '__main__':
     import os
     from config import OPENAI_API_KEY
         ChatPromptTemplate,
         SystemMessagePromptTemplate,
         HumanMessagePromptTemplate)
+    SYSTEM_PROMPT_TEMPLATE = "You're a helpful assistant."
     HUMAN_PROMPT_TEMPLATE = "Human: {question}\n AI answer:"
     prompt = ChatPromptTemplate.from_messages(
         [
             HumanMessagePromptTemplate.from_template(HUMAN_PROMPT_TEMPLATE)
         ]
     )
+    llm = CustomAzureOpenAI(deployment_name="binh-gpt",
                             openai_api_key=OPENAI_API_KEY,
                             openai_api_base=OPENAI_API_BASE,
                             openai_api_version=OPENAI_API_VERSION,
                             temperature=0,
+                            model_kwargs={"top_p": 1.0}, )
     llm_chain = LLMChain(
         llm=llm,
         prompt=prompt

chains/qaibot_chain.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import requests
+import json
+from chains.azure_openai import CustomAzureOpenAI
+from chains.decision_maker import DecisionMaker
+from chains.simple_chain import SimpleChain
+from bs4 import BeautifulSoup
+from chains.summary import WebSummary
+from langchain.utilities.google_search import GoogleSearchAPIWrapper
+from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, DEPLOYMENT_ID, GOOGLE_API_KEY, GOOGLE_CSE_ID
+class QAIBotChain:
+    def __init__(self):
+        self.llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
+                        openai_api_type=OPENAI_API_TYPE,
+                        openai_api_base=OPENAI_API_BASE,
+                        openai_api_version=OPENAI_API_VERSION,
+                        openai_api_key=OPENAI_API_KEY,
+                        temperature=0.0)
+        self.decision = DecisionMaker()
+        self.simple_chain = SimpleChain()
+        self.summary = WebSummary()
+    def run(self, question, custom_web_search=False, num_results=4):
+        if custom_web_search:
+            params = {
+                "q": question,
+                "v": "\{539C9DC1-663A-418D-82A4-662D34EE34BC\}",
+                "p": 10,
+                "l": "en",
+                "s": "{EACE8DB5-668F-4357-9782-405070D28D11}",
+                "itemid": "\{91F4101E-B1F3-4905-A832-96F703D3FBB1\}",
+            }
+            req = requests.get(
+                "https://fptsoftware.com//sxa/search/results/?",
+                params=params
+            )
+            res = json.loads(req.text)
+            results = []
+            for r in res["Results"][:num_results]:
+                link = "https://fptsoftware.com" + r["Url"]
+                results.append({"link": link})
+        else:
+            decision = self.decision.predict(question=question)
+            if "LLM Model" in decision:
+                ai_response = self.simple_chain.predict(question=question)
+                return ai_response, False
+            else:
+                search = GoogleSearchAPIWrapper(google_api_key=GOOGLE_API_KEY, google_cse_id=GOOGLE_CSE_ID)
+                results = search.results(question, num_results=num_results)
+        reference_results = []
+        display_append = []
+        for idx, result in enumerate(results):
+            try:
+                head = requests.head(result['link'])
+                if "text/html" in head.headers['Content-Type']:
+                    html_response = requests.get(result['link'])
+                    soup = BeautifulSoup(html_response.content, "html.parser")
+                    if custom_web_search:
+                        title = result["title"]
+                    else:
+                        title = soup.find_all('title')[0].get_text()
+                    try:
+                        text = soup.get_text()
+                        lines = (line.strip() for line in text.splitlines())
+                        # break multi-headlines into a line each
+                        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+                        # drop blank lines
+                        text = '\n'.join(chunk for chunk in chunks if chunk)
+                        summary = self.web_summary.predict(question=question, doc=text)
+                        print("Can access", result['link'])
+                    except:
+                        print("Cannot access ", result['link'])
+                    reference_results.append([summary, result['link']])
+                    display_append.append(
+                        f'<a href=\"{result["link"]}\" target=\"_blank\">{idx + 1}.&nbsp;{title}</a>'
+                    )
+            except:
+                continue
+        return reference_results, display_append

chains/related_question.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from langchain.chains import LLMChain
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate)
+from chains.azure_openai import CustomAzureOpenAI
+from prompts.related_question import system_template, human_template
+from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, DEPLOYMENT_ID, API_KEY
+from langchain.chat_models import ChatOpenAI
+class RelatedQuestion(LLMChain):
+    prompt = ChatPromptTemplate.from_messages(
+        [SystemMessagePromptTemplate.from_template(
+            system_template),
+         HumanMessagePromptTemplate.from_template(human_template)
+         ])
+    llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
+                    openai_api_type=OPENAI_API_TYPE,
+                    openai_api_base=OPENAI_API_BASE,
+                    openai_api_version=OPENAI_API_VERSION,
+                    openai_api_key=OPENAI_API_KEY,
+                    temperature=0.0)
+if __name__ == "__main__":
+    rel = RelatedQuestion()
+    inputs = "Hello bot"
+    outputs = "Hello! How can I assist you today?"
+    import re
+    pattern = "\d. {*.?}"
+    res = rel.predict(inputs=inputs, outputs=outputs)
+    print(res)
+    out = list(map(lambda x: x.split('. ')[-1], res.split('\n')))
+    results = [[a] for a in out]
+    print(results)

chains/simple_chain.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from langchain.chains.llm import LLMChain
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate)
+from prompts.simple_chain import SYSTEM_PROMPT_TEMPLATE, HUMAN_PROMPT_TEMPLATE
+from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, DEPLOYMENT_ID
+from chains.azure_openai import CustomAzureOpenAI
+class SimpleChain(LLMChain):
+    llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
+                    openai_api_type=OPENAI_API_TYPE,
+                    openai_api_base=OPENAI_API_BASE,
+                    openai_api_version=OPENAI_API_VERSION,
+                    openai_api_key=OPENAI_API_KEY,
+                    temperature=0.0)
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            SystemMessagePromptTemplate.from_template(SYSTEM_PROMPT_TEMPLATE),
+            HumanMessagePromptTemplate.from_template(HUMAN_PROMPT_TEMPLATE)
+        ])

chains/summary.py CHANGED Viewed

@@ -7,6 +7,8 @@ from chains.azure_openai import CustomAzureOpenAI
 from prompts.summary import system_template, human_template
 from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, DEPLOYMENT_ID, API_KEY
 from langchain.chat_models import ChatOpenAI
 class WebSummary(LLMChain):
     prompt = ChatPromptTemplate.from_messages(
@@ -14,12 +16,28 @@ class WebSummary(LLMChain):
             system_template),
          HumanMessagePromptTemplate.from_template(human_template)
          ])
-    # llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
-    #                 openai_api_type=OPENAI_API_TYPE,
-    #                 openai_api_base=OPENAI_API_BASE,
-    #                 openai_api_version=OPENAI_API_VERSION,
-    #                 openai_api_key=OPENAI_API_KEY,
-    #                 temperature=0.0)
-    llm = ChatOpenAI(model_name="gpt-4",
-                    openai_api_key=API_KEY,
-                    temperature=0.0)

 from prompts.summary import system_template, human_template
 from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, DEPLOYMENT_ID, API_KEY
 from langchain.chat_models import ChatOpenAI
+import json
+import requests
 class WebSummary(LLMChain):
     prompt = ChatPromptTemplate.from_messages(
             system_template),
          HumanMessagePromptTemplate.from_template(human_template)
          ])
+    llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
+                    openai_api_type=OPENAI_API_TYPE,
+                    openai_api_base=OPENAI_API_BASE,
+                    openai_api_version=OPENAI_API_VERSION,
+                    openai_api_key=OPENAI_API_KEY,
+                    temperature=0.0)
+    def run(self, question, num_result=4):
+        params = {
+            "q": question,
+            "v": "\{539C9DC1-663A-418D-82A4-662D34EE34BC\}",
+            "p": 10,
+            "l": "en",
+            "s": "{EACE8DB5-668F-4357-9782-405070D28D11}",
+            "itemid": "\{91F4101E-B1F3-4905-A832-96F703D3FBB1\}",
+        }
+        req = requests.get(
+            "https://fptsoftware.com//sxa/search/results/?",
+            params=params
+        )
+        res = json.loads(req.text)
+        results = []
+        for r in res["Results"][:num_result]:
+            link = "https://fptsoftware.com" + r["Url"]
+            results.append({"link": link})
+        return results

chains/web_search.py CHANGED Viewed

@@ -9,14 +9,11 @@ from chains.azure_openai import CustomAzureOpenAI
 from langchain.chat_models import ChatOpenAI
 class GoogleWebSearch(LLMChain):
-    # llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
-    #                 openai_api_type=OPENAI_API_TYPE,
-    #                 openai_api_base=OPENAI_API_BASE,
-    #                 openai_api_version=OPENAI_API_VERSION,
-    #                 openai_api_key=OPENAI_API_KEY,
-    #                 temperature=0.0)
-    llm = ChatOpenAI(model_name="gpt-4",
-                    openai_api_key=API_KEY,
                     temperature=0.0)
     prompt = ChatPromptTemplate.from_messages(
         [

 from langchain.chat_models import ChatOpenAI
 class GoogleWebSearch(LLMChain):
+    llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
+                    openai_api_type=OPENAI_API_TYPE,
+                    openai_api_base=OPENAI_API_BASE,
+                    openai_api_version=OPENAI_API_VERSION,
+                    openai_api_key=OPENAI_API_KEY,
                     temperature=0.0)
     prompt = ChatPromptTemplate.from_messages(
         [

config.py CHANGED Viewed

@@ -2,45 +2,59 @@ import os
 # Folder
 HISTORY_DIR = "history"
-SAVE_DIR = "documents"
 if not os.path.exists(HISTORY_DIR):
     os.makedirs(HISTORY_DIR)
-if not os.path.exists(SAVE_DIR):
-    os.makedirs(SAVE_DIR)
 # Whisper API
-API_KEY = "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"
 MODEL_ID = "whisper-1"
 # Azure endpoint
 OPENAI_API_TYPE = "azure"
 OPENAI_API_VERSION = "2023-05-15"
 # Embedding openai
-EMBEDDING_API_KEY = "e672d672cf2d4c778e352e2f88271ebc"
 EMBEDDING_API_BASE = "https://qaigpt2.openai.azure.com/"
 EMBEDDING_DEPLOYMENT_ID = "embed"
 # ChatGPT
-OPENAI_API_KEY = "1941ec6a33bd405bac7f2f800b10d171"
-OPENAI_API_BASE = "https://qaigptjp.openai.azure.com/"
 DEPLOYMENT_ID = "gpt"
 # Pinecone vector DB
 PINECONE_API_KEY = "82b9902a-2908-4ece-88bf-483c413a91d7"
 PINECONE_ENVIRONMENT = "us-west1-gcp-free"
 INDEX_NAME = "text-indexing"
 # Google search API
-GOOGLE_API_KEY = "AIzaSyBcwB4YIqjDcYr5XnPt5IrktqbH4Mb_1hE"
-GOOGLE_CSE_ID = "e61c62a86e2b848fd"
 # Custom google search API
 CUSTOM_API_KEY = "AIzaSyDycFFOFtPg123bm9N3BRCy_q5gyEk7fzs"
-CUSTOM_CSE_ID = "3364dacc2a4144b22"
 # Local host
 TIMEOUT_STREAM = 60
 SEVER = "0.0.0.0"
 PORT = 7860
 DEBUG = True

 # Folder
 HISTORY_DIR = "history"
 if not os.path.exists(HISTORY_DIR):
     os.makedirs(HISTORY_DIR)
 # Whisper API
+API_KEY = "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"
 MODEL_ID = "whisper-1"
 # Azure endpoint
 OPENAI_API_TYPE = "azure"
 OPENAI_API_VERSION = "2023-05-15"
 # Embedding openai
+EMBEDDING_API_KEY = "776f46ee3ba445ebb2ecaeb988bfd04a"
 EMBEDDING_API_BASE = "https://qaigpt2.openai.azure.com/"
 EMBEDDING_DEPLOYMENT_ID = "embed"
 # ChatGPT
+# EU Region
+# OPENAI_API_KEY = "86f152e6a6ff46b1a71d5324a2823478"
+# OPENAI_API_BASE ="https://qaigpteus2.openai.azure.com/"
+# DEPLOYMENT_ID = "gpt"
+# France Region
+OPENAI_API_KEY = "94e749c64288478d8861fcaf3c1b415f"
+OPENAI_API_BASE ="https://qaigptfr.openai.azure.com/"
 DEPLOYMENT_ID = "gpt"
 # Pinecone vector DB
 PINECONE_API_KEY = "82b9902a-2908-4ece-88bf-483c413a91d7"
 PINECONE_ENVIRONMENT = "us-west1-gcp-free"
 INDEX_NAME = "text-indexing"
+NAME_SPACE_1 = "documents"
+NAME_SPACE_2 = "fanpage"
 # Google search API
+GOOGLE_API_KEY="AIzaSyBcwB4YIqjDcYr5XnPt5IrktqbH4Mb_1hE"
+GOOGLE_CSE_ID="e61c62a86e2b848fd"
 # Custom google search API
 CUSTOM_API_KEY = "AIzaSyDycFFOFtPg123bm9N3BRCy_q5gyEk7fzs"
+CUSTOM_CSE_ID = "a1bdbedc30f2b4790"
 # Local host
 TIMEOUT_STREAM = 60
 SEVER = "0.0.0.0"
 PORT = 7860
 DEBUG = True
+# Azure blob storage
+CONNECTION_STRING = "DefaultEndpointsProtocol=https;AccountName=qaigpt;AccountKey=osgfH8+I/azlhNNn5Ps3jpYLgCfnXKuOZPQ4fkpwVX/tNISdyf8jhfq37lKxJSDIORgDPA7wPg5v+AStb47TRg==;EndpointSuffix=core.windows.net"
+CONTAINER_NAME = "fptdocuments"
+# Azure cosmos db
+CREDENTIAL = "6t1RmAaF6onypDHHtAVrcnNOwPZpPusTiq9N5tHl8HpkDEPZn5y0TJbdCNBga1JuKBJaKlqnc09JACDblCPpbQ=="
+ENDPOINT = 'https://qaigpt.documents.azure.com:443/'
+DATABASE = "chat_gpt"
+CONTAINER_COSMOS = "history"

cosmos_db.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from azure.cosmos import CosmosClient, PartitionKey
+from config import ENDPOINT, CREDENTIAL, DATABASE, CONTAINER_COSMOS
+from datetime import date, datetime
+import json
+_client = CosmosClient(
+    url=ENDPOINT,
+    credential=CREDENTIAL,
+)
+database = _client.create_database_if_not_exists(DATABASE)
+_container = database.create_container_if_not_exists(
+    CONTAINER_COSMOS,
+    partition_key=PartitionKey("/user_id")
+)
+def json_serial(obj):
+    """JSON serializer for objects not serializable by default json code"""
+    if isinstance(obj, (datetime, date)):
+        return obj.isoformat()
+    raise TypeError ("Type %s not serializable" % type(obj))
+def upsert_item(user_id, file_name, history, chatbot):
+    response = _container.upsert_item(
+        body={
+            "id": file_name,
+            "user_id": user_id,
+            "date": json.dumps(datetime.utcnow(), default=json_serial),
+            "history": history,
+            "chatbot": chatbot
+        }
+    )
+    message = f'Upsert {file_name} succesfully'
+    return message
+def read_item(user_id, file_name):
+    response = _container.read_item(item=file_name, partition_key=user_id)
+    return response
+def query_items(user_id, file_name):
+    response = list(_container.query_items(
+        query="SELECT * FROM r WHERE r.user_id=@user_id AND r.id=@id",
+        parameters=[
+            {"name": "@user_id", "value": user_id}, {"name": "@id", "value": file_name}
+        ],
+        enable_cross_partition_query=True
+    ))
+    return response
+def query_item(user_id):
+    response = list(_container.query_items(
+        query="SELECT * FROM r WHERE r.user_id=@user_id",
+        parameters=[
+            {"name": "@user_id", "value": user_id}
+        ],
+        enable_cross_partition_query=True
+    ))
+    return response
+def delete_items(user_id, file_name):
+    response = _container.delete_item(item=file_name, partition_key=user_id)
+    message = f'Delete {file_name} succesfully'
+    return message
+if __name__ == '__main__':
+    mes = query_item("khanh")
+    docs = [m["id"] for m in mes]
+    print(docs)

custom.css ADDED Viewed

	@@ -0,0 +1,1026 @@

+:root {
+    --chatbot-color-light: #000000;
+    --chatbot-color-dark: #FFFFFF;
+    --chatbot-background-color-light: #F3F3F3;
+    --chatbot-background-color-dark: #121111;
+    --message-user-background-color-light: #2685b5;
+    --message-user-background-color-dark: #2685b5;
+    --message-bot-background-color-light: #F3F3F3;
+    --message-bot-background-color-dark: #2C2C2C;
+    --switch-checkbox-color-light: #e9e9ec;
+    --switch-checkbox-color-dark: #515151;
+    --switch-checkbox-marked-color: #2685b5;
+    --cib-shadow-card: 0px 0.3px 0.9px rgba(0, 0, 0, 0.12), 0px 1.6px 3.6px rgba(0, 0, 0, 0.16);
+    --message-font-size: 15px;
+    --background-gradient: linear-gradient(90deg, rgb(239, 242, 247) 0%, 7.60286%, rgb(237, 240, 249) 15.2057%, 20.7513%, rgb(235, 239, 248) 26.297%, 27.6386%, rgb(235, 239, 248) 28.9803%, 38.2826%, rgb(231, 237, 249) 47.585%, 48.1216%, rgb(230, 236, 250) 48.6583%, 53.1306%, rgb(228, 236, 249) 57.6029%, 61.5385%, rgb(227, 234, 250) 65.4741%, 68.7835%, rgb(222, 234, 250) 72.093%, 75.7603%, rgb(219, 230, 248) 79.4275%, 82.8265%, rgb(216, 229, 248) 86.2254%, 87.8354%, rgb(213, 228, 249) 89.4454%, 91.8605%, rgb(210, 226, 249) 94.2755%, 95.4383%, rgb(209, 225, 248) 96.6011%, 98.3005%, rgb(208, 224, 247) 100%);
+    --background-gradient-dark: #0b0f19;
+}
+gradio-app {
+    background: var(--background-gradient) !important;
+}
+.dark gradio-app {
+    background: var(--background-gradient-dark) !important;
+}
+#app_title {
+    font-weight: var(--prose-header-text-weight);
+    font-size: var(--text-xxl);
+    line-height: 1.3;
+    text-align: left;
+    margin-top: 6px;
+    white-space: nowrap;
+}
+#description {
+    text-align: center;
+    margin: 32px 0 4px 0;
+}
+div.form {
+    background: none !important;
+    border: none !important;
+}
+#advanced_warning {
+    display: flex;
+    flex-wrap: wrap;
+    flex-direction: column;
+    align-content: center;
+}
+/* gradio的页脚信息 */
+footer {
+    margin-top: 15px !important;
+    font-size: 85%;
+    display: inline-block;
+    text-align: center;
+    opacity: 0.60;
+    position: absolute;
+    max-height: 30px;
+    width: 100% !important;
+}
+footer[class^="svelte-"] {
+    display: none !important;
+}
+#footer {
+    text-align: center;
+}
+#footer div {
+    display: inline-block;
+}
+#footer .versions {
+    font-size: 85%;
+    opacity: 0.60;
+}
+#float_display {
+    position: absolute;
+    max-height: 30px;
+}
+#toast-update {
+    position: absolute;
+    display: flex;
+    top: -500px;
+    width: 100%;
+    justify-content: center;
+    z-index: var(--layer-top);
+    transition: top 0.3s ease-out;
+}
+#check-chuanhu-update {
+    position: absolute;
+    align-items: center;
+    display: flex;
+    flex-direction: column;
+    justify-content: center;
+    margin: var(--size-6) var(--size-4);
+    box-shadow: var(--shadow-drop-lg);
+    border: 1px solid var(--block-label-border-color);
+    border-radius: var(--container-radius);
+    background: var(--background-fill-primary);
+    padding: var(--size-4) var(--size-6);
+    min-width: 360px;
+    max-width: 480px;
+    overflow: hidden;
+    pointer-events: auto;
+}
+#version-info-title {
+    font-size: 1.2em;
+    font-weight: bold;
+    text-align: start;
+    width: 100%;
+}
+#release-note-wrap {
+    width: 100%;
+    max-width: 400px;
+    height: 120px;
+    border: solid 1px var(--border-color-primary);
+    overflow: auto;
+    padding: 0 8px;
+}
+#release-note-wrap.hideK {
+    display: none;
+}
+/*.chatrow {*/
+/*    gap: 0 !important;*/
+/*}*/
+.record-icon svelte-1thnwz {
+    display: none !important;
+}
+.btn, .audio-btn .sm.secondary, .mic-wrap, .audio-btn .sm.tertiary {
+    height: 48px !important;
+    margin: 0 !important;
+    padding: 0 5px 0 !important;
+    display: inline-block;
+}
+.audio-btn {
+    border: 0 !important;
+    position: relative;
+    background: none !important;
+}
+.mic-wrap {
+    position: absolute;
+    padding: 0 !important;
+    top: 0;
+    left: 0;
+    z-index: 1;
+}
+.audio-btn .sm.secondary, .audio-btn .sm.tertiary {
+    color: transparent;
+    border: none !important;
+}
+.audio-btn .record-icon {
+    display: none !important;
+}
+.dark .btn-send {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="20" height="20" viewBox="0 0 20 20"%3E%3Cpath fill="white" d="M2.724 2.053a.5.5 0 0 0-.707.576l1.498 5.618a.5.5 0 0 0 .4.364l6.855 1.142c.279.047.279.447 0 .494l-6.854 1.142a.5.5 0 0 0-.401.364l-1.498 5.618a.5.5 0 0 0 .707.576l15-7.5a.5.5 0 0 0 0-.894l-15-7.5Z"%2F%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.dark .btn-clear {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="20" height="20" viewBox="0 0 16 16"%3E%3Cpath fill="white" fill-rule="evenodd" d="M15.963 7.23A8 8 0 0 1 .044 8.841a.75.75 0 0 1 1.492-.158a6.5 6.5 0 1 0 9.964-6.16V4.25a.75.75 0 0 1-1.5 0V0h4.25a.75.75 0 0 1 0 1.5h-1.586a8.001 8.001 0 0 1 3.299 5.73ZM7 2a1 1 0 1 0 0-2a1 1 0 0 0 0 2Zm-2.25.25a1 1 0 1 1-2 0a1 1 0 0 1 2 0ZM1.5 6a1 1 0 1 0 0-2a1 1 0 0 0 0 2Z" clip-rule="evenodd"%2F%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.dark .btn-record, .dark .audio-btn .sm.secondary {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="20" height="20" viewBox="0 0 20 20"%3E%3Cpath fill="white" d="M4.5 10a.5.5 0 0 0-1 0a5.5 5.5 0 0 0 5 5.478V17.5a.5.5 0 0 0 1 0v-.706A5.48 5.48 0 0 1 9 14.5A4.5 4.5 0 0 1 4.5 10ZM12 5v4.6a5.514 5.514 0 0 0-2.79 3.393A3 3 0 0 1 6 10V5a3 3 0 0 1 6 0Zm5 9.5a2.5 2.5 0 1 1-5 0a2.5 2.5 0 0 1 5 0Zm2 0a4.5 4.5 0 1 1-9 0a4.5 4.5 0 0 1 9 0Zm-8 0a3.5 3.5 0 1 0 7 0a3.5 3.5 0 0 0-7 0Z"%2F%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.dark .audio-btn .sm.tertiary {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="20" height="20" viewBox="0 0 24 24"%3E%3Cdefs%3E%3Cfilter id="svgSpinnersGooeyBalls10"%3E%3CfeGaussianBlur in="SourceGraphic" result="y" stdDeviation="1.5"%2F%3E%3CfeColorMatrix in="y" result="z" values="1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 18 -7"%2F%3E%3CfeBlend in="SourceGraphic" in2="z"%2F%3E%3C%2Ffilter%3E%3C%2Fdefs%3E%3Cg fill="white" filter="url(%23svgSpinnersGooeyBalls10)"%3E%3Ccircle cx="4" cy="12" r="3"%3E%3Canimate attributeName="cx" calcMode="spline" dur="0.75s" keySplines=".56%2C.52%2C.17%2C.98%3B.56%2C.52%2C.17%2C.98" repeatCount="indefinite" values="4%3B9%3B4"%2F%3E%3Canimate attributeName="r" calcMode="spline" dur="0.75s" keySplines=".56%2C.52%2C.17%2C.98%3B.56%2C.52%2C.17%2C.98" repeatCount="indefinite" values="3%3B8%3B3"%2F%3E%3C%2Fcircle%3E%3Ccircle cx="15" cy="12" r="8"%3E%3Canimate attributeName="cx" calcMode="spline" dur="0.75s" keySplines=".56%2C.52%2C.17%2C.98%3B.56%2C.52%2C.17%2C.98" repeatCount="indefinite" values="15%3B20%3B15"%2F%3E%3Canimate attributeName="r" calcMode="spline" dur="0.75s" keySplines=".56%2C.52%2C.17%2C.98%3B.56%2C.52%2C.17%2C.98" repeatCount="indefinite" values="8%3B3%3B8"%2F%3E%3C%2Fcircle%3E%3C%2Fg%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.dark .btn-del {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="24" height="24" viewBox="0 0 24 24"%3E%3Cpath fill="white" d="m18 9l-5-5v3q0 .825.588 1.413T15 9h3Zm0 10.425L16.6 20.8q-.275.275-.688.288T15.2 20.8q-.275-.275-.275-.7t.275-.7l1.4-1.4l-1.4-1.4q-.275-.275-.275-.7t.275-.7q.275-.275.7-.275t.7.275l1.4 1.4l1.4-1.4q.275-.275.688-.287t.712.287q.275.275.275.7t-.275.7L19.425 18l1.375 1.4q.275.275.288.688t-.288.712q-.275.275-.7.275t-.7-.275L18 19.425ZM6 22q-.825 0-1.413-.588T4 20V4q0-.825.588-1.413T6 2h7.175q.4 0 .763.15t.637.425l4.85 4.85q.275.275.425.638t.15.762v3.525q-.475-.175-.988-.263T17.976 12q-2.5 0-4.237 1.738T12 17.974q0 1.125.4 2.163T13.55 22H6Z"%2F%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.dark .btn-del-all {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="24" height="24" viewBox="0 0 24 24"%3E%3Cg fill="none" fill-rule="evenodd"%3E%3Cpath d="M24 0v24H0V0h24ZM12.593 23.258l-.011.002l-.071.035l-.02.004l-.014-.004l-.071-.035c-.01-.004-.019-.001-.024.005l-.004.01l-.017.428l.005.02l.01.013l.104.074l.015.004l.012-.004l.104-.074l.012-.016l.004-.017l-.017-.427c-.002-.01-.009-.017-.017-.018Zm.265-.113l-.013.002l-.185.093l-.01.01l-.003.011l.018.43l.005.012l.008.007l.201.093c.012.004.023 0 .029-.008l.004-.014l-.034-.614c-.003-.012-.01-.02-.02-.022Zm-.715.002a.023.023 0 0 0-.027.006l-.006.014l-.034.614c0 .012.007.02.017.024l.015-.002l.201-.093l.01-.008l.004-.011l.017-.43l-.003-.012l-.01-.01l-.184-.092Z"%2F%3E%3Cpath fill="white" d="M4 3a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V7.5a2 2 0 0 0-2-2h-7.52l-1.399-1.75A2 2 0 0 0 9.52 3H4Zm5.172 7.172a1 1 0 0 1 1.414 0L12 11.586l1.414-1.414a1 1 0 1 1 1.414 1.414L13.414 13l1.414 1.414a1 1 0 0 1-1.414 1.414L12 14.414l-1.414 1.414a1 1 0 1 1-1.414-1.414L10.586 13l-1.414-1.414a1 1 0 0 1 0-1.414Z"%2F%3E%3C%2Fg%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.btn-send {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="20" height="20" viewBox="0 0 20 20"%3E%3Cpath fill="%232685b5" d="M2.724 2.053a.5.5 0 0 0-.707.576l1.498 5.618a.5.5 0 0 0 .4.364l6.855 1.142c.279.047.279.447 0 .494l-6.854 1.142a.5.5 0 0 0-.401.364l-1.498 5.618a.5.5 0 0 0 .707.576l15-7.5a.5.5 0 0 0 0-.894l-15-7.5Z"%2F%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.btn-clear {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="20" height="20" viewBox="0 0 16 16"%3E%3Cpath fill="%232685b5" fill-rule="evenodd" d="M15.963 7.23A8 8 0 0 1 .044 8.841a.75.75 0 0 1 1.492-.158a6.5 6.5 0 1 0 9.964-6.16V4.25a.75.75 0 0 1-1.5 0V0h4.25a.75.75 0 0 1 0 1.5h-1.586a8.001 8.001 0 0 1 3.299 5.73ZM7 2a1 1 0 1 0 0-2a1 1 0 0 0 0 2Zm-2.25.25a1 1 0 1 1-2 0a1 1 0 0 1 2 0ZM1.5 6a1 1 0 1 0 0-2a1 1 0 0 0 0 2Z" clip-rule="evenodd"%2F%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.btn-record, .audio-btn .sm.secondary {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="20" height="20" viewBox="0 0 20 20"%3E%3Cpath fill="%232685b5" d="M4.5 10a.5.5 0 0 0-1 0a5.5 5.5 0 0 0 5 5.478V17.5a.5.5 0 0 0 1 0v-.706A5.48 5.48 0 0 1 9 14.5A4.5 4.5 0 0 1 4.5 10ZM12 5v4.6a5.514 5.514 0 0 0-2.79 3.393A3 3 0 0 1 6 10V5a3 3 0 0 1 6 0Zm5 9.5a2.5 2.5 0 1 1-5 0a2.5 2.5 0 0 1 5 0Zm2 0a4.5 4.5 0 1 1-9 0a4.5 4.5 0 0 1 9 0Zm-8 0a3.5 3.5 0 1 0 7 0a3.5 3.5 0 0 0-7 0Z"%2F%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.audio-btn .sm.tertiary {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="24" height="24" viewBox="0 0 24 24"%3E%3Cdefs%3E%3Cfilter id="svgSpinnersGooeyBalls10"%3E%3CfeGaussianBlur in="SourceGraphic" result="y" stdDeviation="1.5"%2F%3E%3CfeColorMatrix in="y" result="z" values="1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 18 -7"%2F%3E%3CfeBlend in="SourceGraphic" in2="z"%2F%3E%3C%2Ffilter%3E%3C%2Fdefs%3E%3Cg fill="%232685b5" filter="url(%23svgSpinnersGooeyBalls10)"%3E%3Ccircle cx="4" cy="12" r="3"%3E%3Canimate attributeName="cx" calcMode="spline" dur="0.75s" keySplines=".56%2C.52%2C.17%2C.98%3B.56%2C.52%2C.17%2C.98" repeatCount="indefinite" values="4%3B9%3B4"%2F%3E%3Canimate attributeName="r" calcMode="spline" dur="0.75s" keySplines=".56%2C.52%2C.17%2C.98%3B.56%2C.52%2C.17%2C.98" repeatCount="indefinite" values="3%3B8%3B3"%2F%3E%3C%2Fcircle%3E%3Ccircle cx="15" cy="12" r="8"%3E%3Canimate attributeName="cx" calcMode="spline" dur="0.75s" keySplines=".56%2C.52%2C.17%2C.98%3B.56%2C.52%2C.17%2C.98" repeatCount="indefinite" values="15%3B20%3B15"%2F%3E%3Canimate attributeName="r" calcMode="spline" dur="0.75s" keySplines=".56%2C.52%2C.17%2C.98%3B.56%2C.52%2C.17%2C.98" repeatCount="indefinite" values="8%3B3%3B8"%2F%3E%3C%2Fcircle%3E%3C%2Fg%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.btn-del {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="24" height="24" viewBox="0 0 24 24"%3E%3Cpath fill="%232685b5" d="m18 9l-5-5v3q0 .825.588 1.413T15 9h3Zm0 10.425L16.6 20.8q-.275.275-.688.288T15.2 20.8q-.275-.275-.275-.7t.275-.7l1.4-1.4l-1.4-1.4q-.275-.275-.275-.7t.275-.7q.275-.275.7-.275t.7.275l1.4 1.4l1.4-1.4q.275-.275.688-.287t.712.287q.275.275.275.7t-.275.7L19.425 18l1.375 1.4q.275.275.288.688t-.288.712q-.275.275-.7.275t-.7-.275L18 19.425ZM6 22q-.825 0-1.413-.588T4 20V4q0-.825.588-1.413T6 2h7.175q.4 0 .763.15t.637.425l4.85 4.85q.275.275.425.638t.15.762v3.525q-.475-.175-.988-.263T17.976 12q-2.5 0-4.237 1.738T12 17.974q0 1.125.4 2.163T13.55 22H6Z"%2F%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.btn-del-all {
+    background: url('data:image/svg+xml,%3Csvg xmlns="http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg" width="24" height="24" viewBox="0 0 24 24"%3E%3Cg fill="none" fill-rule="evenodd"%3E%3Cpath d="M24 0v24H0V0h24ZM12.593 23.258l-.011.002l-.071.035l-.02.004l-.014-.004l-.071-.035c-.01-.004-.019-.001-.024.005l-.004.01l-.017.428l.005.02l.01.013l.104.074l.015.004l.012-.004l.104-.074l.012-.016l.004-.017l-.017-.427c-.002-.01-.009-.017-.017-.018Zm.265-.113l-.013.002l-.185.093l-.01.01l-.003.011l.018.43l.005.012l.008.007l.201.093c.012.004.023 0 .029-.008l.004-.014l-.034-.614c-.003-.012-.01-.02-.02-.022Zm-.715.002a.023.023 0 0 0-.027.006l-.006.014l-.034.614c0 .012.007.02.017.024l.015-.002l.201-.093l.01-.008l.004-.011l.017-.43l-.003-.012l-.01-.01l-.184-.092Z"%2F%3E%3Cpath fill="%232685b5" d="M4 3a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V7.5a2 2 0 0 0-2-2h-7.52l-1.399-1.75A2 2 0 0 0 9.52 3H4Zm5.172 7.172a1 1 0 0 1 1.414 0L12 11.586l1.414-1.414a1 1 0 1 1 1.414 1.414L13.414 13l1.414 1.414a1 1 0 0 1-1.414 1.414L12 14.414l-1.414 1.414a1 1 0 1 1-1.414-1.414L10.586 13l-1.414-1.414a1 1 0 0 1 0-1.414Z"%2F%3E%3C%2Fg%3E%3C%2Fsvg%3E') center no-repeat !important;
+}
+.tooltip-btn, .audio-btn .sm.secondary, .audio-btn .sm.tertiary {
+    position: relative;
+    display: inline-block;
+    padding: 10px 20px;
+    border: 1px solid #ddd;
+    background-color: #f9f9f9;
+    cursor: pointer;
+}
+.tooltip-btn::after,
+.tooltip-btn::before,
+.audio-btn .sm.secondary::after,
+.audio-btn .sm.secondary::before,
+.audio-btn .sm.tertiary::after,
+.audio-btn .sm.tertiary::before {
+    content: "";
+    position: absolute;
+    visibility: hidden;
+    top: 100%;
+    left: 50%;
+    transform: translateX(-50%);
+    transition: opacity 0.3s;
+    pointer-events: none;
+}
+.tooltip-content-send::after {
+    content: "Send message";
+}
+.tooltip-content-record::after, .audio-btn .sm.secondary::after {
+    content: "Use microphone";
+}
+.audio-btn .sm.tertiary::after {
+    content: "Stop recording";
+}
+.tooltip-content-clear::after {
+    content: "New topic";
+}
+.tooltip-del:after {
+    content: "Remove selected files";
+}
+.tooltip-del-all::after {
+    content: "Remove all files";
+}
+.tooltip-btn::after, .audio-btn .sm.secondary::after, .audio-btn .sm.tertiary::after {
+    background-color: #000;
+    color: #fff;
+    text-align: center;
+    font-size: 12px;
+    font-weight: bold;
+    padding: 5px;
+    border-radius: 6px;
+    z-index: 1;
+    white-space: nowrap;
+    opacity: 0;
+    margin-top: 10px; /* Spacing between the button and the tooltip */
+}
+.tooltip-btn::before, .audio-btn .sm.secondary::before, .audio-btn .sm.tertiary::before {
+    border: 5px solid transparent;
+    border-bottom-color: #000; /* Arrow color */
+}
+.tooltip-btn:hover::after,
+.tooltip-btn:hover::before,
+.audio-btn .sm.secondary:hover::after,
+.audio-btn .sm.secondary:hover::before,
+.audio-btn .sm.tertiary:hover::after,
+.audio-btn .sm.tertiary:hover::before {
+    visibility: visible;
+    opacity: 0.8; /* Arrow and tooltip opacity */
+}
+.btn-update-group {
+    display: flex;
+    justify-content: space-evenly;
+    align-items: center;
+    width: 100%;
+    padding-top: 10px;
+}
+.btn-update-group.hideK {
+    display: none;
+}
+/* user_info */
+#user_info.block {
+    white-space: nowrap;
+    position: absolute;
+    left: 13em;
+    top: -1em;
+    z-index: var(--layer-2);
+    box-shadow: var(--block-shadow);
+    border: none !important;
+    border-radius: 10px 10px 10px 0;
+    background: var(--color-accent);
+    padding: var(--block-label-padding);
+    font-size: var(--block-label-text-size);
+    line-height: var(--line-sm);
+    width: auto;
+    max-height: 30px !important;
+    opacity: 1;
+    transition: opacity 0.3s ease-in-out;
+}
+#user_info.block .wrap {
+    opacity: 0;
+}
+#user_info p {
+    color: white;
+    font-weight: var(--block-label-text-weight);
+}
+#user_info.hideK {
+    opacity: 0;
+    transition: opacity 1s ease-in-out;
+}
+/* status_display */
+#status_display {
+    margin-bottom: 10px;
+    display: flex;
+    min-height: 2em;
+    align-items: flex-end;
+    justify-content: flex-end;
+}
+#status_display p {
+    font-size: .85em;
+    font-family: ui-monospace, "SF Mono", "SFMono-Regular", "Menlo", "Consolas", "Liberation Mono", "Microsoft Yahei UI", "Microsoft Yahei", monospace;
+    color: var(--body-text-color-subdued);
+}
+#status_display {
+    transition: all 0.6s;
+}
+#chuanhu_chatbot {
+    transition: height 0.3s ease;
+}
+/* usage_display */
+.insert_block {
+    position: relative;
+    margin: 0;
+    padding: 8px 12px;
+    box-shadow: var(--block-shadow);
+    border-width: var(--block-border-width);
+    border-color: var(--block-border-color);
+    border-radius: var(--block-radius);
+    background: var(--block-background-fill);
+    width: 100%;
+    line-height: var(--line-sm);
+    min-height: 2em;
+}
+#usage_display p, #usage_display span {
+    margin: 0;
+    font-size: .85em;
+    color: var(--body-text-color-subdued);
+}
+.progress-bar {
+    background-color: var(--input-background-fill);
+    margin: .5em 0 !important;
+    height: 20px;
+    border-radius: 10px;
+    overflow: hidden;
+}
+.progress {
+    background-color: var(--block-title-background-fill);
+    height: 100%;
+    border-radius: 10px;
+    text-align: right;
+    transition: width 0.5s ease-in-out;
+}
+.progress-text {
+    /* color: white; */
+    display: none !important;
+    /*color: var(--color-accent) !important;*/
+    /*font-size: 1em !important;*/
+    /*font-weight: bold;*/
+    /*padding-right: 10px;*/
+    /*line-height: 20px;*/
+}
+/* 亮暗色模式切换 */
+#apSwitch input[type="checkbox"] {
+    margin: 0 !important;
+}
+#apSwitch label.apSwitch {
+    display: flex;
+    align-items: center;
+    cursor: pointer;
+    color: var(--body-text-color);
+    font-weight: var(--checkbox-label-text-weight);
+    font-size: var(--checkbox-label-text-size);
+    line-height: var(--line-md);
+    margin: 2px 0 !important;
+}
+input[type="checkbox"]#apSwitch_checkbox::before {
+    background: none !important;
+    content: '🌞';
+    border: none !important;
+    box-shadow: none !important;
+    font-size: 22px;
+    top: -4.4px;
+    left: -1px;
+}
+input:checked[type="checkbox"]#apSwitch_checkbox::before {
+    content: '🌚';
+    left: 16px;
+}
+/* .apSwitch {
+    top: 2px;
+    display: inline-block;
+    height: 22px;
+    position: relative;
+    width: 40px;
+    border-radius: 11px;
+    box-shadow: inset 0 0 1px 0 rgba(0,0,0,0.05), inset 0 0 2px 0 rgba(0,0,0,0.08) !important;
+}
+.apSwitch input {
+    display: none !important;
+}
+.apSlider {
+    background-color: var(--neutral-200);
+    bottom: 0;
+    cursor: pointer;
+    left: 0;
+    position: absolute;
+    right: 0;
+    top: 0;
+    transition: .4s;
+    font-size: 22px;
+    border-radius: 11px;
+}
+.apSlider::before {
+    transform: scale(0.9);
+    position: absolute;
+    transition: .4s;
+    content: "🌞";
+}
+input:checked + .apSlider {
+    background-color: var(--primary-600);
+}
+input:checked + .apSlider::before {
+    transform: translateX(18px);
+    content:"🌚";
+} */
+.switch_checkbox label {
+    flex-direction: row-reverse;
+    justify-content: space-between;
+}
+.switch_checkbox input[type="checkbox"] + span {
+    margin-left: 0 !important;
+}
+.switch_checkbox input[type="checkbox"] {
+    -moz-appearance: none;
+    appearance: none;
+    -webkit-appearance: none;
+    outline: none;
+}
+.switch_checkbox input[type="checkbox"] {
+    display: inline-block !important;
+    position: relative !important;
+    border: none !important;
+    outline: none;
+    width: 40px !important;
+    height: 22px !important;
+    border-radius: 11px !important;
+    background-image: none !important;
+    box-shadow: inset 0 0 1px 0 rgba(0, 0, 0, 0.05), inset 0 0 2px 0 rgba(0, 0, 0, 0.08) !important;
+    background-color: var(--switch-checkbox-color-light) !important;
+    transition: .2s ease background-color;
+}
+.dark .switch_checkbox input[type="checkbox"] {
+    background-color: var(--switch-checkbox-color-light) !important;
+}
+.switch_checkbox input[type="checkbox"]::before {
+    content: "";
+    position: absolute;
+    width: 22px;
+    height: 22px;
+    top: 0;
+    left: 0;
+    background: #FFFFFF;
+    border: 0.5px solid rgba(0, 0, 0, 0.02);
+    box-shadow: 0 0 0 0 rgba(0, 0, 0, 0.15), 0 1px 0 0 rgba(0, 0, 0, 0.05);
+    transform: scale(0.9);
+    border-radius: 11px !important;
+    transition: .4s ease all;
+    box-shadow: var(--input-shadow);
+}
+.switch_checkbox input:checked[type="checkbox"] {
+    background-color: var(--switch-checkbox-marked-color) !important;
+}
+.switch_checkbox input:checked[type="checkbox"]::before {
+    background-color: #fff;
+    left: 18px;
+}
+/* Override Slider Styles (for webkit browsers like Safari and Chrome)
+ * 好希望这份提案能早日实现 https://github.com/w3c/csswg-drafts/issues/4410
+ * 进度滑块在各个平台还是太不统一了
+ */
+/* input[type="range"] {
+    -webkit-appearance: none;
+    height: 4px;
+    background: var(--input-background-fill);
+    border-radius: 5px;
+    background-image: linear-gradient(var(--primary-500),var(--primary-500));
+    background-size: 0% 100%;
+    background-repeat: no-repeat;
+} */
+input[type="range"] {
+    height: 4px;
+    border-radius: 5px;
+}
+input[type="range"]::-webkit-slider-thumb {
+    -webkit-appearance: none;
+    height: 20px;
+    width: 20px;
+    border-radius: 50%;
+    border: solid 0.5px #ddd;
+    background-color: white;
+    cursor: ew-resize;
+    box-shadow: var(--input-shadow);
+    transition: background-color .1s ease;
+}
+input[type="range"]::-webkit-slider-thumb:hover {
+    background: var(--neutral-50);
+}
+input[type=range]::-webkit-slider-runnable-track {
+    -webkit-appearance: none;
+    box-shadow: none;
+    border: none;
+    background: transparent;
+}
+hr.append-display {
+    margin: 8px 0;
+    border: none;
+    height: 1px;
+    border-top-width: 0;
+    background-image: linear-gradient(to right, rgba(50, 50, 50, 0.1), rgba(150, 150, 150, 0.8), rgba(50, 50, 50, 0.1));
+}
+.source-a {
+    font-size: 0.8em;
+    max-width: 100%;
+    margin: 0;
+    display: flex;
+    flex-direction: row;
+    flex-wrap: wrap;
+    align-items: center;
+    /* background-color: #dddddd88; */
+    border-radius: 1.5rem;
+    padding: 0.2em;
+}
+.source-a a, .source-a details {
+    display: inline-block;
+    background-color: #aaaaaa50;
+    border-radius: 1rem;
+    padding: 0.5em;
+    text-align: center;
+    text-overflow: ellipsis;
+    overflow: hidden;
+    min-width: 40%;
+    white-space: nowrap;
+    margin: 0.2rem 0.1rem;
+    text-decoration: none !important;
+    flex: 1;
+    transition: flex 0.5s;
+}
+.source-a details > p {
+    background-color: #aaaaaa50;
+    border-radius: 1rem;
+    padding: 0.5em;
+    text-overflow: ellipsis;
+    text-align: left !important;
+    overflow: hidden;
+    white-space: pre !important;
+    margin: 0.2rem 0.1rem;
+    text-decoration: none !important;
+    flex: 1;
+    transition: flex 0.5s;
+}
+.source-a a:hover, .source-a details:hover {
+    background-color: #aaaaaa20;
+    flex: 2;
+}
+#submit_btn, #cancel_btn {
+    height: 42px !important;
+}
+#submit_btn::before {
+    content: url("data:image/svg+xml, %3Csvg width='21px' height='20px' viewBox='0 0 21 20' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink'%3E %3Cg id='page' stroke='none' stroke-width='1' fill='none' fill-rule='evenodd'%3E %3Cg id='send' transform='translate(0.435849, 0.088463)' fill='%23FFFFFF' fill-rule='nonzero'%3E %3Cpath d='M0.579148261,0.0428666046 C0.301105539,-0.0961547561 -0.036517765,0.122307382 0.0032026237,0.420210298 L1.4927172,18.1553639 C1.5125774,18.4334066 1.79062012,18.5922882 2.04880264,18.4929872 L8.24518329,15.8913017 L11.6412765,19.7441794 C11.8597387,19.9825018 12.2370824,19.8832008 12.3165231,19.5852979 L13.9450591,13.4882182 L19.7839562,11.0255541 C20.0619989,10.8865327 20.0818591,10.4694687 19.7839562,10.3105871 L0.579148261,0.0428666046 Z M11.6138902,17.0883151 L9.85385903,14.7195502 L0.718169621,0.618812241 L12.69945,12.9346347 L11.6138902,17.0883151 Z' id='shape'%3E%3C/path%3E %3C/g%3E %3C/g%3E %3C/svg%3E");
+    height: 21px;
+}
+#cancel_btn::before {
+    content: url("data:image/svg+xml,%3Csvg width='21px' height='21px' viewBox='0 0 21 21' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink'%3E %3Cg id='pg' stroke='none' stroke-width='1' fill='none' fill-rule='evenodd'%3E %3Cpath d='M10.2072007,20.088463 C11.5727865,20.088463 12.8594566,19.8259823 14.067211,19.3010209 C15.2749653,18.7760595 16.3386126,18.0538087 17.2581528,17.1342685 C18.177693,16.2147282 18.8982283,15.1527965 19.4197586,13.9484733 C19.9412889,12.7441501 20.202054,11.4557644 20.202054,10.0833163 C20.202054,8.71773046 19.9395733,7.43106036 19.4146119,6.22330603 C18.8896505,5.01555169 18.1673997,3.95018885 17.2478595,3.0272175 C16.3283192,2.10424615 15.2646719,1.3837109 14.0569176,0.865611739 C12.8491633,0.34751258 11.5624932,0.088463 10.1969073,0.088463 C8.83132146,0.088463 7.54636692,0.34751258 6.34204371,0.865611739 C5.1377205,1.3837109 4.07407321,2.10424615 3.15110186,3.0272175 C2.22813051,3.95018885 1.5058797,5.01555169 0.984349419,6.22330603 C0.46281914,7.43106036 0.202054,8.71773046 0.202054,10.0833163 C0.202054,11.4557644 0.4645347,12.7441501 0.9894961,13.9484733 C1.5144575,15.1527965 2.23670831,16.2147282 3.15624854,17.1342685 C4.07578877,18.0538087 5.1377205,18.7760595 6.34204371,19.3010209 C7.54636692,19.8259823 8.83475258,20.088463 10.2072007,20.088463 Z M10.2072007,18.2562448 C9.07493099,18.2562448 8.01471483,18.0452309 7.0265522,17.6232031 C6.03838956,17.2011753 5.17031614,16.6161693 4.42233192,15.8681851 C3.6743477,15.1202009 3.09105726,14.2521274 2.67246059,13.2639648 C2.25386392,12.2758022 2.04456558,11.215586 2.04456558,10.0833163 C2.04456558,8.95104663 2.25386392,7.89083047 2.67246059,6.90266784 C3.09105726,5.9145052 3.6743477,5.04643178 4.42233192,4.29844756 C5.17031614,3.55046334 6.036674,2.9671729 7.02140552,2.54857623 C8.00613703,2.12997956 9.06463763,1.92068122 10.1969073,1.92068122 C11.329177,1.92068122 12.3911087,2.12997956 13.3827025,2.54857623 C14.3742962,2.9671729 15.2440852,3.55046334 15.9920694,4.29844756 C16.7400537,5.04643178 17.3233441,5.9145052 17.7419408,6.90266784 C18.1605374,7.89083047 18.3698358,8.95104663 18.3698358,10.0833163 C18.3698358,11.215586 18.1605374,12.2758022 17.7419408,13.2639648 C17.3233441,14.2521274 16.7400537,15.1202009 15.9920694,15.8681851 C15.2440852,16.6161693 14.3760118,17.2011753 13.3878492,17.6232031 C12.3996865,18.0452309 11.3394704,18.2562448 10.2072007,18.2562448 Z M7.65444721,13.6242324 L12.7496608,13.6242324 C13.0584616,13.6242324 13.3003556,13.5384544 13.4753427,13.3668984 C13.6503299,13.1953424 13.7378234,12.9585951 13.7378234,12.6566565 L13.7378234,7.49968276 C13.7378234,7.19774418 13.6503299,6.96099688 13.4753427,6.78944087 C13.3003556,6.61788486 13.0584616,6.53210685 12.7496608,6.53210685 L7.65444721,6.53210685 C7.33878414,6.53210685 7.09345904,6.61788486 6.91847191,6.78944087 C6.74348478,6.96099688 6.65599121,7.19774418 6.65599121,7.49968276 L6.65599121,12.6566565 C6.65599121,12.9585951 6.74348478,13.1953424 6.91847191,13.3668984 C7.09345904,13.5384544 7.33878414,13.6242324 7.65444721,13.6242324 Z' id='shape' fill='%23FF3B30' fill-rule='nonzero'%3E%3C/path%3E %3C/g%3E %3C/svg%3E");
+    height: 21px;
+}
+/* list */
+ol:not(.options), ul:not(.options) {
+    padding-inline-start: 2em !important;
+}
+/* 亮色（默认） */
+#chuanhu_chatbot {
+    background-color: var(--chatbot-background-color-light) !important;
+    color: var(--chatbot-color-light) !important;
+}
+[data-testid = "bot"] {
+    background: var(--message-bot-background-color-light) !important;
+    box-shadow: var(--cib-shadow-card) !important;
+    outline: transparent solid 1px !important;
+}
+[data-testid = "user"] {
+    background: linear-gradient(130deg, #2685b5 20%, #135a7f 77.5%) !important;
+    color: white !important;
+    box-shadow: var(--cib-shadow-card) !important;
+    outline: transparent solid 1px !important;
+}
+/* 暗色 */
+.dark #chuanhu_chatbot {
+    background-color: var(--chatbot-background-color-dark) !important;
+    color: var(--chatbot-color-dark) !important;
+}
+.dark [data-testid = "bot"] {
+    background-color: var(--message-bot-background-color-dark) !important;
+}
+.dark [data-testid = "user"] {
+    background: linear-gradient(130deg, #2685b5 20%, #135a7f 77.5%) !important;
+}
+/* 屏幕宽度大于等于500px的设备 */
+/* update on 2023.4.8: 高度的细致调整已写入JavaScript */
+@media screen and (min-width: 500px) {
+    #chuanhu_chatbot {
+        height: calc(100vh - 200px);
+    }
+    #chuanhu_chatbot > .wrapper > .wrap {
+        max-height: calc(100vh - 200px - var(--line-sm) * 1rem - 2 * var(--block-label-margin));
+    }
+}
+/* 屏幕宽度小于500px的设备 */
+@media screen and (max-width: 499px) {
+    #chuanhu_chatbot {
+        height: calc(100vh - 140px);
+    }
+    #chuanhu_chatbot > .wrapper > .wrap {
+        max-height: calc(100vh - 140px - var(--line-sm) * 1rem - 2 * var(--block-label-margin));
+    }
+    [data-testid = "bot"] {
+        max-width: 95% !important;
+    }
+    #app_title h1 {
+        letter-spacing: -1px;
+        font-size: 22px;
+    }
+}
+#chuanhu_chatbot > .wrapper > .wrap {
+    overflow-x: hidden;
+}
+/* 对话气泡 */
+.message {
+    border-radius: var(--radius-xl) !important;
+    border: none;
+    padding: var(--spacing-xl) !important;
+    font-size: var(--message-font-size) !important;
+    line-height: var(--line-md) !important;
+    min-height: calc(var(--text-md) * var(--line-md) + 2 * var(--spacing-xl));
+    min-width: calc(var(--text-md) * var(--line-md) + 2 * var(--spacing-xl));
+}
+[data-testid = "bot"] {
+    max-width: 85%;
+    border-bottom-left-radius: 0 !important;
+}
+[data-testid = "user"] {
+    max-width: 85%;
+    width: auto !important;
+    border-bottom-right-radius: 0 !important;
+}
+.message.user p {
+    white-space: pre-wrap;
+}
+.message .user-message {
+    display: block;
+    padding: 0 !important;
+    white-space: pre-wrap;
+}
+.message .md-message p {
+    margin-top: 0.6em !important;
+    margin-bottom: 0.6em !important;
+}
+.message .md-message p:first-child {
+    margin-top: 0 !important;
+}
+.message .md-message p:last-of-type {
+    margin-bottom: 0 !important;
+}
+.message .md-message {
+    display: block;
+    padding: 0 !important;
+}
+.message .raw-message p {
+    margin: 0 !important;
+}
+.message .raw-message {
+    display: block;
+    padding: 0 !important;
+    white-space: pre-wrap;
+}
+.raw-message.hideM, .md-message.hideM {
+    display: none;
+}
+/* custom buttons */
+.chuanhu-btn {
+    border-radius: 5px;
+    color: rgba(120, 120, 120, 0.64) !important;
+    padding: 4px !important;
+    position: absolute;
+    right: -22px;
+    cursor: pointer !important;
+    transition: color .2s ease, background-color .2s ease;
+}
+.chuanhu-btn:hover {
+    background-color: rgba(167, 167, 167, 0.25) !important;
+    color: unset !important;
+}
+.chuanhu-btn:active {
+    background-color: rgba(167, 167, 167, 0.5) !important;
+}
+.chuanhu-btn:focus {
+    outline: none;
+}
+.copy-bot-btn {
+    /* top: 18px; */
+    bottom: 0;
+}
+.toggle-md-btn {
+    /* top: 0; */
+    bottom: 20px;
+}
+.copy-code-btn {
+    position: relative;
+    float: right;
+    font-size: 1em;
+    cursor: pointer;
+}
+.message-wrap > div img {
+    border-radius: 10px !important;
+}
+/* history message */
+.wrapper > .wrap > .history-message {
+    padding: 10px !important;
+}
+.history-message {
+    /* padding: 0 !important; */
+    opacity: 80%;
+    display: flex;
+    flex-direction: column;
+}
+.history-message > .history-message {
+    padding: 0 !important;
+}
+.history-message > .message-wrap {
+    padding: 0 !important;
+    margin-bottom: 16px;
+}
+.history-message > .message {
+    margin-bottom: 16px;
+}
+.wrapper > .wrap > .history-message::after {
+    content: "";
+    display: block;
+    height: 2px;
+    background-color: var(--body-text-color-subdued);
+    margin-bottom: 10px;
+    margin-top: -10px;
+    clear: both;
+}
+.wrapper > .wrap > .history-message > :last-child::after {
+    content: "仅供查看";
+    display: block;
+    text-align: center;
+    color: var(--body-text-color-subdued);
+    font-size: 0.8em;
+}
+/* 表格 */
+table {
+    margin: 1em 0;
+    border-collapse: collapse;
+    empty-cells: show;
+}
+td, th {
+    border: 1.2px solid var(--border-color-primary) !important;
+    padding: 0.2em;
+}
+thead {
+    background-color: rgba(175, 184, 193, 0.2);
+}
+thead th {
+    padding: .5em .2em;
+}
+.message :not(pre) code {
+    display: inline;
+    white-space: break-spaces;
+    font-family: var(--font-mono);
+    border-radius: 6px;
+    margin: 0 2px 0 2px;
+    padding: .2em .4em .1em .4em;
+    background-color: rgba(175, 184, 193, 0.2);
+}
+/* 代码块 */
+.message pre,
+.message pre[class*=language-] {
+    color: #fff;
+    overflow-x: auto;
+    overflow-y: hidden;
+    margin: .8em 1em 1em 0em !important;
+    padding: var(--spacing-xl) 1.2em !important;
+    border-radius: var(--radius-lg) !important;
+}
+.message pre code,
+.message pre code[class*=language-] {
+    color: #fff;
+    padding: 0;
+    margin: 0;
+    background-color: unset;
+    text-shadow: none;
+    font-family: var(--font-mono);
+}
+/* 覆盖 gradio 丑陋的复制按钮样式 */
+pre button[title="copy"] {
+    border-radius: 5px;
+    transition: background-color .2s ease;
+}
+pre button[title="copy"]:hover {
+    background-color: #333232;
+}
+pre button .check {
+    color: #fff !important;
+    background: var(--neutral-950) !important;
+}
+/* 覆盖prism.css */
+.language-css .token.string,
+.style .token.string,
+.token.entity,
+.token.operator,
+.token.url {
+    background: none !important;
+}
+.label.svelte-13hsdno.svelte-13hsdno.svelte-13hsdno {
+    display: none;
+}
+.gallery.svelte-13hsdno.svelte-13hsdno.svelte-13hsdno {
+    justify-content: flex-end;
+}
+.button-group {
+    width: 200px !important;
+    display: flex !important;
+    justify-content: space-between !important;
+}
+.chatbot {
+    background: none !important;
+    border: none !important;
+}
+button[class^="svelte-"], button[class*="svelte-"].selected {
+    width: calc(var(--size-full) / 3) !important; /* number of elements */
+    background: none !important;
+}
+/*#component-2 {*/
+/*    gap: 0 !important;*/
+/*    margin-bottom: 3px !important;*/
+/*}*/
+.token, .token > span {
+    text-overflow: ellipsis;
+    overflow: hidden;
+}
+.audio-btn > div[class^="svelte-"], .audio-btn > audio {
+    display: none;
+}
+div.logo {
+    background: url('https://i.ibb.co/BnmxGhz/logo.png') no-repeat left center;
+    background-size: contain;
+    height: 40px;
+    align-items: flex-start;
+    justify-content: flex-start;
+}
+#component-29 {
+    font-size: var(--message-font-size) !important;
+    box-shadow: var(--cib-shadow-card) !important;
+    outline: transparent solid 1px !important;
+}
+#component-26 > div.gallery.svelte-13hsdno > button > div {
+    background: var(--chatbot-background-color-light) !important;
+}
+.dark #component-26 > div.gallery.svelte-13hsdno > button > div {
+    background: var(--chatbot-background-color-dark) !important;
+}
+.message.pending {
+    background: none !important;
+}
+#component-36 > label {
+    width: inherit;
+}

custom_vectordb.py ADDED Viewed

	@@ -0,0 +1,421 @@

+"""Wrapper around Pinecone vector database."""
+from __future__ import annotations
+import logging
+import uuid
+from typing import Any, Callable, Iterable, List, Optional, Tuple
+import numpy as np
+from langchain.docstore.document import Document
+from langchain.embeddings.base import Embeddings
+from langchain.vectorstores.base import VectorStore
+from langchain.vectorstores.utils import DistanceStrategy, maximal_marginal_relevance
+logger = logging.getLogger(__name__)
+class Pinecone(VectorStore):
+    """Wrapper around Pinecone vector database.
+    To use, you should have the ``pinecone-client`` python package installed.
+    Example:
+        .. code-block:: python
+            from langchain.vectorstores import Pinecone
+            from langchain.embeddings.openai import OpenAIEmbeddings
+            import pinecone
+            # The environment should be the one specified next to the API key
+            # in your Pinecone console
+            pinecone.init(api_key="***", environment="...")
+            index = pinecone.Index("langchain-demo")
+            embeddings = OpenAIEmbeddings()
+            vectorstore = Pinecone(index, embeddings.embed_query, "text")
+    """
+    def __init__(
+        self,
+        index: Any,
+        embedding_function: Callable,
+        text_key: str,
+        namespace: Optional[str] = None,
+        distance_strategy: Optional[DistanceStrategy] = DistanceStrategy.COSINE,
+    ):
+        """Initialize with Pinecone client."""
+        try:
+            import pinecone
+        except ImportError:
+            raise ValueError(
+                "Could not import pinecone python package. "
+                "Please install it with `pip install pinecone-client`."
+            )
+        if not isinstance(index, pinecone.index.Index):
+            raise ValueError(
+                f"client should be an instance of pinecone.index.Index, "
+                f"got {type(index)}"
+            )
+        self._index = index
+        self._embedding_function = embedding_function
+        self._text_key = text_key
+        self._namespace = namespace
+        self.distance_strategy = distance_strategy
+    @property
+    def embeddings(self) -> Optional[Embeddings]:
+        # TODO: Accept this object directly
+        return None
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        namespace: Optional[str] = None,
+        batch_size: int = 32,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Run more texts through the embeddings and add to the vectorstore.
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+            ids: Optional list of ids to associate with the texts.
+            namespace: Optional pinecone namespace to add the texts to.
+        Returns:
+            List of ids from adding the texts into the vectorstore.
+        """
+        if namespace is None:
+            namespace = self._namespace
+        # Embed and create the documents
+        docs = []
+        ids = ids or [str(uuid.uuid4()) for _ in texts]
+        for i, text in enumerate(texts):
+            embedding = self._embedding_function(text)
+            metadata = metadatas[i] if metadatas else {}
+            metadata[self._text_key] = text
+            docs.append((ids[i], embedding, metadata))
+        # upsert to Pinecone
+        self._index.upsert(
+            vectors=docs, namespace=namespace, batch_size=batch_size, **kwargs
+        )
+        return ids
+    def similarity_search_with_relevance_scores(
+        self,
+        query: str,
+        k: int = 4,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        return [
+            a
+            for a in self.similarity_search_with_score(query, k=k)
+            if a[1] > kwargs["score_threshold"]
+        ]
+    def similarity_search_with_score(
+        self,
+        query: str,
+        k: int = 4,
+        filter: Optional[dict] = None,
+        namespace: Optional[str] = None,
+    ) -> List[Tuple[Document, float]]:
+        """Return pinecone documents most similar to query, along with scores.
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Dictionary of argument(s) to filter on metadata
+            namespace: Namespace to search in. Default will search in '' namespace.
+        Returns:
+            List of Documents most similar to the query and score for each
+        """
+        if namespace is None:
+            namespace = self._namespace
+        query_obj = self._embedding_function(query)
+        docs = []
+        results = self._index.query(
+            [query_obj],
+            top_k=k,
+            include_metadata=True,
+            namespace=namespace,
+            filter=filter,
+        )
+        for res in results["matches"]:
+            metadata = res["metadata"]
+            if self._text_key in metadata:
+                text = metadata.pop(self._text_key)
+                score = res["score"]
+                docs.append((Document(page_content=text, metadata=metadata), score))
+            else:
+                logger.warning(
+                    f"Found document with no `{self._text_key}` key. Skipping."
+                )
+        return docs
+    def similarity_search(
+        self,
+        query: str,
+        k: int = 4,
+        filter: Optional[dict] = None,
+        namespace: Optional[str] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return pinecone documents most similar to query.
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Dictionary of argument(s) to filter on metadata
+            namespace: Namespace to search in. Default will search in '' namespace.
+        Returns:
+            List of Documents most similar to the query and score for each
+        """
+        docs_and_scores = self.similarity_search_with_score(
+            query, k=k, filter=filter, namespace=namespace, **kwargs
+        )
+        return [doc for doc, _ in docs_and_scores]
+    def _select_relevance_score_fn(self) -> Callable[[float], float]:
+        """
+        The 'correct' relevance function
+        may differ depending on a few things, including:
+        - the distance / similarity metric used by the VectorStore
+        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
+        - embedding dimensionality
+        - etc.
+        """
+        if self.distance_strategy == DistanceStrategy.COSINE:
+            return self._cosine_relevance_score_fn
+        elif self.distance_strategy == DistanceStrategy.MAX_INNER_PRODUCT:
+            return self._max_inner_product_relevance_score_fn
+        elif self.distance_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE:
+            return self._euclidean_relevance_score_fn
+        else:
+            raise ValueError(
+                "Unknown distance strategy, must be cosine, max_inner_product "
+                "(dot product), or euclidean"
+            )
+    def max_marginal_relevance_search_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        filter: Optional[dict] = None,
+        namespace: Optional[str] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5.
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
+        if namespace is None:
+            namespace = self._namespace
+        results = self._index.query(
+            [embedding],
+            top_k=fetch_k,
+            include_values=True,
+            include_metadata=True,
+            namespace=namespace,
+            filter=filter,
+        )
+        mmr_selected = maximal_marginal_relevance(
+            np.array([embedding], dtype=np.float32),
+            [item["values"] for item in results["matches"]],
+            k=k,
+            lambda_mult=lambda_mult,
+        )
+        selected = [results["matches"][i]["metadata"] for i in mmr_selected]
+        return [
+            Document(page_content=metadata.pop((self._text_key)), metadata=metadata)
+            for metadata in selected
+        ]
+    def max_marginal_relevance_search(
+        self,
+        query: str,
+        k: int = 4,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        filter: Optional[dict] = None,
+        namespace: Optional[str] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5.
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
+        embedding = self._embedding_function(query)
+        return self.max_marginal_relevance_search_by_vector(
+            embedding, k, fetch_k, lambda_mult, filter, namespace
+        )
+    @classmethod
+    def from_texts(
+        cls,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        batch_size: int = 32,
+        text_key: str = "text",
+        index_name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        upsert_kwargs: Optional[dict] = None,
+        **kwargs: Any,
+    ) -> Pinecone:
+        """Construct Pinecone wrapper from raw documents.
+        This is a user friendly interface that:
+            1. Embeds documents.
+            2. Adds the documents to a provided Pinecone index
+        This is intended to be a quick way to get started.
+        Example:
+            .. code-block:: python
+                from langchain import Pinecone
+                from langchain.embeddings import OpenAIEmbeddings
+                import pinecone
+                # The environment should be the one specified next to the API key
+                # in your Pinecone console
+                pinecone.init(api_key="***", environment="...")
+                embeddings = OpenAIEmbeddings()
+                pinecone = Pinecone.from_texts(
+                    texts,
+                    embeddings,
+                    index_name="langchain-demo"
+                )
+        """
+        try:
+            import pinecone
+        except ImportError:
+            raise ValueError(
+                "Could not import pinecone python package. "
+                "Please install it with `pip install pinecone-client`."
+            )
+        indexes = pinecone.list_indexes()  # checks if provided index exists
+        if index_name in indexes:
+            index = pinecone.Index(index_name)
+        elif len(indexes) == 0:
+            raise ValueError(
+                "No active indexes found in your Pinecone project, "
+                "are you sure you're using the right API key and environment?"
+            )
+        else:
+            raise ValueError(
+                f"Index '{index_name}' not found in your Pinecone project. "
+                f"Did you mean one of the following indexes: {', '.join(indexes)}"
+            )
+        for i in range(0, len(texts), batch_size):
+            # set end position of batch
+            i_end = min(i + batch_size, len(texts))
+            # get batch of texts and ids
+            lines_batch = texts[i:i_end]
+            # create ids if not provided
+            if ids:
+                ids_batch = ids[i:i_end]
+            else:
+                ids_batch = [str(uuid.uuid4()) for n in range(i, i_end)]
+            # create embeddings
+            embeds = embedding.embed_documents(lines_batch)
+            # prep metadata and upsert batch
+            if metadatas:
+                metadata = metadatas[i:i_end]
+            else:
+                metadata = [{} for _ in range(i, i_end)]
+            for j, line in enumerate(lines_batch):
+                metadata[j][text_key] = line
+            to_upsert = zip(ids_batch, embeds, metadata)
+            # upsert to Pinecone
+            _upsert_kwargs = upsert_kwargs or {}
+            index.upsert(vectors=list(to_upsert), namespace=namespace, **_upsert_kwargs)
+        return cls(index, embedding.embed_query, text_key, namespace, **kwargs)
+    @classmethod
+    def from_existing_index(
+        cls,
+        index_name: str,
+        embedding: Embeddings,
+        text_key: str = "text",
+        namespace: Optional[str] = None,
+    ) -> Pinecone:
+        """Load pinecone vectorstore from index name."""
+        try:
+            import pinecone
+        except ImportError:
+            raise ValueError(
+                "Could not import pinecone python package. "
+                "Please install it with `pip install pinecone-client`."
+            )
+        return cls(
+            pinecone.Index(index_name), embedding.embed_query, text_key, namespace
+        )
+    def delete(
+        self,
+        ids: Optional[List[str]] = None,
+        delete_all: Optional[bool] = None,
+        namespace: Optional[str] = None,
+        filter: Optional[dict] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Delete by vector IDs or filter.
+        Args:
+            ids: List of ids to delete.
+            filter: Dictionary of conditions to filter vectors to delete.
+        """
+        if namespace is None:
+            namespace = self._namespace
+        if delete_all:
+            self._index.delete(delete_all=True, namespace=namespace, **kwargs)
+        elif ids is not None:
+            chunk_size = 1000
+            for i in range(0, len(ids), chunk_size):
+                chunk = ids[i : i + chunk_size]
+                self._index.delete(ids=chunk, namespace=namespace, **kwargs)
+        elif filter is not None:
+            self._index.delete(filter=filter, namespace=namespace, **kwargs)
+        else:
+            raise ValueError("Either ids, delete_all, or filter must be provided.")
+        return None

data.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

geckodriver.log DELETED Viewed

The diff for this file is too large to render. See raw diff

history/binh/2023-08-06_17-10-17/Assistance Inquiry.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"history": [["Hello bot", "Hello! How can I assist you today?"]], "chatbot": [["Hello bot", "Hello! How can I assist you today?"]]}

html_parser.py DELETED Viewed

@@ -1,116 +0,0 @@
-"""HTML parser.
-Contains parser for html files.
-"""
-import re
-from pathlib import Path
-from typing import Dict, Union
-from abc import abstractmethod
-from pathlib import Path
-from typing import Dict, List, Optional, Union
-class BaseParser:
-    """Base class for all parsers."""
-    def __init__(self, parser_config: Optional[Dict] = None):
-        """Init params."""
-        self._parser_config = parser_config
-    def init_parser(self) -> None:
-        """Init parser and store it."""
-        parser_config = self._init_parser()
-        self._parser_config = parser_config
-    @property
-    def parser_config_set(self) -> bool:
-        """Check if parser config is set."""
-        return self._parser_config is not None
-    @property
-    def parser_config(self) -> Dict:
-        """Check if parser config is set."""
-        if self._parser_config is None:
-            raise ValueError("Parser config not set.")
-        return self._parser_config
-    @abstractmethod
-    def _init_parser(self) -> Dict:
-        """Initialize the parser with the config."""
-    @abstractmethod
-    def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
-        """Parse file."""
-class HTMLParser(BaseParser):
-    """HTML parser."""
-    def _init_parser(self) -> Dict:
-        """Init parser."""
-        return {}
-    def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]:
-        """Parse file.
-            Returns:
-            Union[str, List[str]]: a string or a List of strings.
-        """
-        try:
-            from unstructured.partition.html import partition_html
-            from unstructured.staging.base import convert_to_isd
-            from unstructured.cleaners.core import clean
-        except ImportError:
-            raise ValueError("unstructured package is required to parse HTML files.")
-        # Using the unstructured library to convert the html to isd format
-        # isd sample : isd = [
-        #   {"text": "My Title", "type": "Title"},
-        #   {"text": "My Narrative", "type": "NarrativeText"}
-        # ]
-        with open(file, "r", encoding="utf-8") as fp:
-            elements = partition_html(file=fp)
-            isd = convert_to_isd(elements)
-            # Removing non ascii charactwers from isd_el['text']
-        for isd_el in isd:
-            isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()
-        # Removing all the \n characters from isd_el['text'] using regex and replace with single space
-        # Removing all the extra spaces  from isd_el['text'] using regex and replace with single space
-        for isd_el in isd:
-            isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
-            isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
-        # more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
-        for isd_el in isd:
-            clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
-        # Creating a list of all the indexes of isd_el['type'] = 'Title'
-        title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
-        # Creating 'Chunks' - List of lists of strings
-        # each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
-        # Each Chunk can be thought of as an individual set of data, which can be sent to the model
-        # Where Each Title is grouped together with the data under it
-        Chunks = [[]]
-        final_chunks = list(list())
-        for i, isd_el in enumerate(isd):
-            if i in title_indexes:
-                Chunks.append([])
-            Chunks[-1].append(isd_el['text'])
-        # Removing all the chunks with sum of lenth of all the strings in the chunk < 25
-        # TODO: This value can be an user defined variable
-        for chunk in Chunks:
-            # sum of lenth of all the strings in the chunk
-            sum = 0
-            sum += len(str(chunk))
-            if sum < 25:
-                Chunks.remove(chunk)
-            else:
-                # appending all the approved chunks to final_chunks as a single string
-                final_chunks.append(" ".join([str(item) for item in chunk]))
-        return final_chunks

logo.png ADDED Viewed

process_fb.py DELETED Viewed

@@ -1,55 +0,0 @@
-import json
-import ast
-import os
-import pinecone
-from pydantic import Field
-from vector_db import Document
-from html_parser import HTMLParser
-from langchain.vectorstores import Pinecone
-from config import PINECONE_API_KEY, PINECONE_ENVIRONMENT, INDEX_NAME
-from config import EMBEDDING_API_BASE, EMBEDDING_API_KEY, OPENAI_API_TYPE, OPENAI_API_VERSION, EMBEDDING_DEPLOYMENT_ID
-from langchain.embeddings import OpenAIEmbeddings
-# initialize pinecone
-pinecone.init(
-    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
-    environment=PINECONE_ENVIRONMENT,  # next to api key in console
-)
-# Azure embedding model definition
-embeddings = OpenAIEmbeddings(
-    deployment=EMBEDDING_DEPLOYMENT_ID,
-    openai_api_key=EMBEDDING_API_KEY,
-    openai_api_base=EMBEDDING_API_BASE,
-    openai_api_type=OPENAI_API_TYPE,
-    openai_api_version=OPENAI_API_VERSION,
-    chunk_size=16
-)
-if INDEX_NAME and INDEX_NAME not in pinecone.list_indexes():
-    pinecone.create_index(
-        INDEX_NAME,
-        metric="cosine",
-        dimension=1536
-    )
-    print(f"Index {INDEX_NAME} created successfully")
-index = pinecone.Index(INDEX_NAME)
-with open('data.json') as json_file:
-    data = json.load(json_file)
-datas = ast.literal_eval(data)
-texts = []
-for k, v in datas.items():
-   content = v["content"]
-   post_url = v["post_url"]
-   texts.append(Document(page_content=content, metadata={"source": post_url}))
-if len(texts)>0:
-    Pinecone.from_documents(texts, embeddings, index_name=INDEX_NAME)
-    message = f"Add files to {INDEX_NAME} sucessfully"
-    print(message)

process_html.py DELETED Viewed

@@ -1,58 +0,0 @@
-import os
-import pinecone
-from pydantic import Field
-from vector_db import Document
-from html_parser import HTMLParser
-from langchain.vectorstores import Pinecone
-from config import PINECONE_API_KEY, PINECONE_ENVIRONMENT, INDEX_NAME
-from config import EMBEDDING_API_BASE, EMBEDDING_API_KEY, OPENAI_API_TYPE, OPENAI_API_VERSION, EMBEDDING_DEPLOYMENT_ID
-from langchain.embeddings import OpenAIEmbeddings
-WEBSITE_FOLDER = 'website'
-parser = HTMLParser()
-# initialize pinecone
-pinecone.init(
-    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
-    environment=PINECONE_ENVIRONMENT,  # next to api key in console
-)
-# Azure embedding model definition
-embeddings = OpenAIEmbeddings(
-    deployment=EMBEDDING_DEPLOYMENT_ID,
-    openai_api_key=EMBEDDING_API_KEY,
-    openai_api_base=EMBEDDING_API_BASE,
-    openai_api_type=OPENAI_API_TYPE,
-    openai_api_version=OPENAI_API_VERSION,
-    chunk_size=16
-)
-if INDEX_NAME and INDEX_NAME not in pinecone.list_indexes():
-    pinecone.create_index(
-        INDEX_NAME,
-        metric="cosine",
-        dimension=1536
-    )
-    print(f"Index {INDEX_NAME} created successfully")
-index = pinecone.Index(INDEX_NAME)
-index.delete(delete_all=True)
-files_src = os.listdir(WEBSITE_FOLDER)
-documents = []
-for file in files_src:
-    filepath = os.path.join(WEBSITE_FOLDER, file)
-    filename = os.path.basename(filepath)
-    data = parser.parse_file(filepath)
-    texts= []
-    for d in data:
-        texts.append(Document(page_content=d, metadata={"source": filepath}))
-    documents.extend(texts)
-print(len(documents))
-if len(documents)>0:
-    document_id = [d.metadata['document_id'] + f"_{idx}" for (idx, d) in enumerate(documents)]
-    Pinecone.from_documents(documents, embeddings, ids=document_id, index_name=INDEX_NAME)
-    message = f"Add website to {INDEX_NAME} sucessfully"

prompts/__pycache__/condense_llm.cpython-39.pyc CHANGED Viewed

Binary files a/prompts/__pycache__/condense_llm.cpython-39.pyc and b/prompts/__pycache__/condense_llm.cpython-39.pyc differ

prompts/__pycache__/create_topic.cpython-39.pyc ADDED Viewed

Binary file (603 Bytes). View file

prompts/__pycache__/custom_chain.cpython-39.pyc CHANGED Viewed

Binary files a/prompts/__pycache__/custom_chain.cpython-39.pyc and b/prompts/__pycache__/custom_chain.cpython-39.pyc differ

prompts/__pycache__/decision_maker.cpython-39.pyc ADDED Viewed

Binary file (794 Bytes). View file

prompts/__pycache__/llm.cpython-39.pyc CHANGED Viewed

Binary files a/prompts/__pycache__/llm.cpython-39.pyc and b/prompts/__pycache__/llm.cpython-39.pyc differ

prompts/__pycache__/multi_queries.cpython-39.pyc DELETED Viewed

Binary file (336 Bytes)

prompts/__pycache__/related_question.cpython-39.pyc ADDED Viewed

Binary file (770 Bytes). View file

prompts/__pycache__/simple_chain.cpython-39.pyc ADDED Viewed

Binary file (311 Bytes). View file

prompts/__pycache__/stage_analyzer.cpython-39.pyc DELETED Viewed

Binary file (3.6 kB)