Spaces:

RKP64
/

streamlit

Sleeping

App Files Files Community

RKP64 commited on May 29, 2023

Commit

2aeaebd

1 Parent(s): 0b68d75

Delete streamlit-demo

Browse files

Files changed (40) hide show

streamlit-demo/.streamlit/secrets.toml +0 -6
streamlit-demo/Dockerfile +0 -25
streamlit-demo/README.md +0 -174
streamlit-demo/__pycache__/brain.cpython-310.pyc +0 -0
streamlit-demo/__pycache__/components_keys.cpython-310.pyc +0 -0
streamlit-demo/__pycache__/explorer.cpython-310.pyc +0 -0
streamlit-demo/__pycache__/files.cpython-310.pyc +0 -0
streamlit-demo/__pycache__/question.cpython-310.pyc +0 -0
streamlit-demo/__pycache__/stats.cpython-310.pyc +0 -0
streamlit-demo/__pycache__/utils.cpython-310.pyc +0 -0
streamlit-demo/app.py +0 -123
streamlit-demo/brain.py +0 -39
streamlit-demo/components_keys.py +0 -4
streamlit-demo/explorer.py +0 -12
streamlit-demo/files.py +0 -191
streamlit-demo/loaders/__init__.py +0 -0
streamlit-demo/loaders/__pycache__/__init__.cpython-310.pyc +0 -0
streamlit-demo/loaders/__pycache__/audio.cpython-310.pyc +0 -0
streamlit-demo/loaders/__pycache__/common.cpython-310.pyc +0 -0
streamlit-demo/loaders/__pycache__/csv.cpython-310.pyc +0 -0
streamlit-demo/loaders/__pycache__/docx.cpython-310.pyc +0 -0
streamlit-demo/loaders/__pycache__/html.cpython-310.pyc +0 -0
streamlit-demo/loaders/__pycache__/markdown.cpython-310.pyc +0 -0
streamlit-demo/loaders/__pycache__/pdf.cpython-310.pyc +0 -0
streamlit-demo/loaders/__pycache__/powerpoint.cpython-310.pyc +0 -0
streamlit-demo/loaders/__pycache__/txt.cpython-310.pyc +0 -0
streamlit-demo/loaders/audio.py +0 -65
streamlit-demo/loaders/common.py +0 -42
streamlit-demo/loaders/csv.py +0 -5
streamlit-demo/loaders/docx.py +0 -5
streamlit-demo/loaders/html.py +0 -47
streamlit-demo/loaders/markdown.py +0 -5
streamlit-demo/loaders/pdf.py +0 -6
streamlit-demo/loaders/powerpoint.py +0 -5
streamlit-demo/loaders/txt.py +0 -5
streamlit-demo/question.py +0 -81
streamlit-demo/requirements.txt +0 -14
streamlit-demo/sidebar.py +0 -11
streamlit-demo/stats.py +0 -31
streamlit-demo/utils.py +0 -11

streamlit-demo/.streamlit/secrets.toml DELETED Viewed

@@ -1,6 +0,0 @@
-supabase_url = "https://qlvpvyrbyynccpqyljoc.supabase.co"
-supabase_service_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InFsdnB2eXJieXluY2NwcXlsam9jIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTY4NDkxODY4NywiZXhwIjoyMDAwNDk0Njg3fQ.hTDr6FydSOdl0kyFzTiS6mEmkuYXugAAJy_R7eIQIl8"
-openai_api_key = "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"
-anthropic_api_key = ""
-self_hosted = "true"
-usage_limit = 2000

streamlit-demo/Dockerfile DELETED Viewed

@@ -1,25 +0,0 @@
-# app/Dockerfile
-FROM python:3.11-slim
-WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    software-properties-common \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY . /app
-## Mount .streamlit folder to load config.toml and secrets.toml
-RUN pip3 install -r requirements.txt
-EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-VOLUME [ "/root/.streamlit" ]
-ENTRYPOINT ["streamlit", "run", "main.py", "--server.port=8501", "--server.address=0.0.0.0"]

streamlit-demo/README.md DELETED Viewed

@@ -1,174 +0,0 @@
-# Quivr
-<p align="center">
-<img src="../logo.png" alt="Quivr-logo" width="30%">
-<p align="center">
-<a href="https://discord.gg/HUpRgp2HG8">
-  <img src="https://img.shields.io/badge/discord-join%20chat-blue.svg" alt="Join our Discord" height="40">
-</a>
-Quivr is your second brain in the cloud, designed to easily store and retrieve unstructured information. It's like Obsidian but powered by generative AI.
-## Features
-- **Store Anything**: Quivr can handle almost any type of data you throw at it. Text, images, code snippets, you name it.
-- **Generative AI**: Quivr uses advanced AI to help you generate and retrieve information.
-- **Fast and Efficient**: Designed with speed and efficiency in mind. Quivr makes sure you can access your data as quickly as possible.
-- **Secure**: Your data is stored securely in the cloud and is always under your control.
-- **Compatible Files**:
-  - **Text**
-  - **Markdown**
-  - **PDF**
-  - **Audio**
-  - **Video**
-- **Open Source**: Quivr is open source and free to use.
-## Demo
-### Demo with GPT3.5
-https://github.com/StanGirard/quivr/assets/19614572/80721777-2313-468f-b75e-09379f694653
-### Demo with Claude 100k context
-https://github.com/StanGirard/quivr/assets/5101573/9dba918c-9032-4c8d-9eea-94336d2c8bd4
-## Getting Started
-These instructions will get you a copy of the project up and running on your local machine for development and testing purposes.
-### Prerequisites
-Make sure you have the following installed before continuing:
-- Python 3.10 or higher
-- Pip
-- Virtualenv
-You'll also need a [Supabase](https://supabase.com/) account for:
-- A new Supabase project
-- Supabase Project API key
-- Supabase Project URL
-### Installing
-- Clone the repository
-```bash
-git clone git@github.com:StanGirard/Quivr.git && cd Quivr
-```
-- Create a virtual environment
-```bash
-virtualenv venv
-```
-- Activate the virtual environment
-```bash
-source venv/bin/activate
-```
-- Install the dependencies
-```bash
-pip install -r requirements.txt
-```
-- Copy the streamlit secrets.toml example file
-```bash
-cp .streamlit/secrets.toml.example .streamlit/secrets.toml
-```
-- Add your credentials to .streamlit/secrets.toml file
-```toml
-supabase_url = "SUPABASE_URL"
-supabase_service_key = "SUPABASE_SERVICE_KEY"
-openai_api_key = "OPENAI_API_KEY"
-anthropic_api_key = "ANTHROPIC_API_KEY" # Optional
-```
-_Note that the `supabase_service_key` is found in your Supabase dashboard under Project Settings -> API. Use the `anon` `public` key found in the `Project API keys` section._
-- Run the following migration scripts on the Supabase database via the web interface (SQL Editor -> `New query`)
-```sql
--- Enable the pgvector extension to work with embedding vectors
-       create extension vector;
-       -- Create a table to store your documents
-       create table documents (
-       id bigserial primary key,
-       content text, -- corresponds to Document.pageContent
-       metadata jsonb, -- corresponds to Document.metadata
-       embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
-       );
-       CREATE FUNCTION match_documents(query_embedding vector(1536), match_count int)
-           RETURNS TABLE(
-               id bigint,
-               content text,
-               metadata jsonb,
-               -- we return matched vectors to enable maximal marginal relevance searches
-               embedding vector(1536),
-               similarity float)
-           LANGUAGE plpgsql
-           AS $$
-           # variable_conflict use_column
-       BEGIN
-           RETURN query
-           SELECT
-               id,
-               content,
-               metadata,
-               embedding,
-               1 -(documents.embedding <=> query_embedding) AS similarity
-           FROM
-               documents
-           ORDER BY
-               documents.embedding <=> query_embedding
-           LIMIT match_count;
-       END;
-       $$;
-```
-and
-```sql
-create table
-  stats (
-    -- A column called "time" with data type "timestamp"
-    time timestamp,
-    -- A column called "details" with data type "text"
-    chat boolean,
-    embedding boolean,
-    details text,
-    metadata jsonb,
-    -- An "integer" primary key column called "id" that is generated always as identity
-    id integer primary key generated always as identity
-  );
-```
-- Run the app
-```bash
-streamlit run main.py
-```
-## Built With
-* [NextJS](https://nextjs.org/) - The React framework used.
-* [FastAPI](https://fastapi.tiangolo.com/) - The API framework used.
-* [Supabase](https://supabase.io/) - The open source Firebase alternative.
-## Contributing
-Open a pull request and we'll review it as soon as possible.
-## Star History
-[![Star History Chart](https://api.star-history.com/svg?repos=StanGirard/quivr&type=Date)](https://star-history.com/#StanGirard/quivr&Date)

streamlit-demo/__pycache__/brain.cpython-310.pyc DELETED Viewed

Binary file (1.82 kB)

streamlit-demo/__pycache__/components_keys.cpython-310.pyc DELETED Viewed

Binary file (386 Bytes)

streamlit-demo/__pycache__/explorer.cpython-310.pyc DELETED Viewed

Binary file (516 Bytes)

streamlit-demo/__pycache__/files.cpython-310.pyc DELETED Viewed

Binary file (5.24 kB)

streamlit-demo/__pycache__/question.cpython-310.pyc DELETED Viewed

Binary file (2.56 kB)

streamlit-demo/__pycache__/stats.cpython-310.pyc DELETED Viewed

Binary file (736 Bytes)

streamlit-demo/__pycache__/utils.cpython-310.pyc DELETED Viewed

Binary file (565 Bytes)

streamlit-demo/app.py DELETED Viewed

@@ -1,123 +0,0 @@
-# main.py
-import os
-import tempfile
-import streamlit as st
-from files import file_uploader, url_uploader
-from question import chat_with_doc
-from brain import brain
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.vectorstores import SupabaseVectorStore
-from supabase import Client, create_client
-from explorer import view_document
-from stats import get_usage_today
-supabase_url = st.secrets.supabase_url
-supabase_key = st.secrets.supabase_service_key
-openai_api_key = st.secrets.openai_api_key
-anthropic_api_key = st.secrets.anthropic_api_key
-supabase: Client = create_client(supabase_url, supabase_key)
-self_hosted = st.secrets.self_hosted
-embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
-vector_store = SupabaseVectorStore(
-    supabase, embeddings, table_name="documents")
-models = ["gpt-3.5-turbo", "gpt-4"]
-if anthropic_api_key:
-    models += ["claude-v1", "claude-v1.3",
-               "claude-instant-v1-100k", "claude-instant-v1.1-100k"]
-# Set the theme
-st.set_page_config(
-    page_title="KPMG GPT",
-    layout="wide",
-    initial_sidebar_state="expanded",
-)
-st.title("KPMG GPT")
-st.markdown("")
-if self_hosted == "false":
-    st.markdown('**📢 Note: In the public demo, access to functionality is restricted. You can only use the GPT-3.5-turbo model and upload files up to 1Mb. To use more models and upload larger files, consider self-hosting Quivr.**')
-st.markdown("---\n\n")
-st.session_state["overused"] = False
-if self_hosted == "false":
-    usage = get_usage_today(supabase)
-    if usage > st.secrets.usage_limit:
-        st.markdown(
-            f"<span style='color:red'>You have used {usage} tokens today, which is more than your daily limit of {st.secrets.usage_limit} tokens. Please come back later or consider self-hosting.</span>", unsafe_allow_html=True)
-        st.session_state["overused"] = True
-    else:
-        st.markdown(f"<span style='color:blue'>Usage today: {usage} tokens out of {st.secrets.usage_limit}</span>", unsafe_allow_html=True)
-    st.write("---")
-# Initialize session state variables
-if 'model' not in st.session_state:
-    st.session_state['model'] = "gpt-3.5-turbo"
-if 'temperature' not in st.session_state:
-    st.session_state['temperature'] = 0.0
-if 'chunk_size' not in st.session_state:
-    st.session_state['chunk_size'] = 500
-if 'chunk_overlap' not in st.session_state:
-    st.session_state['chunk_overlap'] = 0
-if 'max_tokens' not in st.session_state:
-    st.session_state['max_tokens'] = 256
-# Create a radio button for user to choose between adding knowledge or asking a question
-user_choice = st.radio(
-    "Choose an action", ('Add Knowledge', 'Chat with your Brain', 'Forget', "Explore"))
-st.markdown("---\n\n")
-if user_choice == 'Add Knowledge':
-    # Display chunk size and overlap selection only when adding knowledge
-    st.sidebar.title("Configuration")
-    st.sidebar.markdown(
-        "Choose your chunk size and overlap for adding knowledge.")
-    st.session_state['chunk_size'] = st.sidebar.slider(
-        "Select Chunk Size", 100, 2000, st.session_state['chunk_size'], 50)
-    st.session_state['chunk_overlap'] = st.sidebar.slider(
-        "Select Chunk Overlap", 0, 200, st.session_state['chunk_overlap'], 10)
-    # Create two columns for the file uploader and URL uploader
-    col1, col2 = st.columns(2)
-    with col1:
-        file_uploader(supabase, vector_store)
-    with col2:
-        url_uploader(supabase, vector_store)
-elif user_choice == 'Chat with your Brain':
-    # Display model and temperature selection only when asking questions
-    st.sidebar.title("Configuration")
-    st.sidebar.markdown(
-        "Choose your model and temperature for asking questions.")
-    if self_hosted != "false":
-        st.session_state['model'] = st.sidebar.selectbox(
-        "Select Model", models, index=(models).index(st.session_state['model']))
-    else:
-        st.sidebar.write("**Model**: gpt-3.5-turbo")
-        st.sidebar.write("**Self Host to unlock more models such as claude-v1 and GPT4**")
-        st.session_state['model'] = "gpt-3.5-turbo"
-    st.session_state['temperature'] = st.sidebar.slider(
-        "Select Temperature", 0.0, 1.0, st.session_state['temperature'], 0.1)
-    if st.secrets.self_hosted != "false":
-        st.session_state['max_tokens'] = st.sidebar.slider(
-            "Select Max Tokens", 256, 2048, st.session_state['max_tokens'], 2048)
-    else:
-        st.session_state['max_tokens'] = 500
-    chat_with_doc(st.session_state['model'], vector_store, stats_db=supabase)
-elif user_choice == 'Forget':
-    st.sidebar.title("Configuration")
-    brain(supabase)
-elif user_choice == 'Explore':
-    st.sidebar.title("Configuration")
-    view_document(supabase)
-st.markdown("---\n\n")

streamlit-demo/brain.py DELETED Viewed

@@ -1,39 +0,0 @@
-import streamlit as st
-import numpy as np
-def brain(supabase):
-    ## List all documents
-    response = supabase.table("documents").select("name:metadata->>file_name, size:metadata->>file_size", count="exact").execute()
-    documents = response.data  # Access the data from the response
-    # Convert each dictionary to a tuple of items, then to a set to remove duplicates, and then back to a dictionary
-    unique_data = [dict(t) for t in set(tuple(d.items()) for d in documents)]
-    # Sort the list of documents by size in decreasing order
-    unique_data.sort(key=lambda x: int(x['size']), reverse=True)
-    # Display some metrics at the top of the page
-    col1, col2 = st.columns(2)
-    col1.metric(label="Total Documents", value=len(unique_data))
-    col2.metric(label="Total Size (bytes)", value=sum(int(doc['size']) for doc in unique_data))
-    for document in unique_data:
-        # Create a unique key for each button by using the document name
-        button_key = f"delete_{document['name']}"
-        # Display the document name, size and the delete button on the same line
-        col1, col2, col3 = st.columns([3, 1, 1])
-        col1.markdown(f"**{document['name']}** ({document['size']} bytes)")
-        if col2.button('❌', key=button_key):
-            delete_document(supabase, document['name'])
-def delete_document(supabase, document_name):
-    # Delete the document from the database
-    response = supabase.table("documents").delete().match({"metadata->>file_name": document_name}).execute()
-    # Check if the deletion was successful
-    if len(response.data) > 0:
-        st.write(f"✂️ {document_name} was deleted.")
-    else:
-        st.write(f"❌ {document_name} was not deleted.")

streamlit-demo/components_keys.py DELETED Viewed

@@ -1,4 +0,0 @@
-"""Store streamlit component keys"""
-class ComponentsKeys:
-    FILE_UPLOADER = "file_uploader"

streamlit-demo/explorer.py DELETED Viewed

@@ -1,12 +0,0 @@
-import streamlit as st
-def view_document(supabase):
-    # Get the document from the database
-    response = supabase.table("documents").select("content").execute()
-    st.write("**This feature is in active development**")
-    # Display a list of elements from the documents
-    # If the user clicks on an element, display the content of the document
-    for document in response.data:
-        if st.button(document['content'][:50].replace("\n", " ")):
-            continue

streamlit-demo/files.py DELETED Viewed

@@ -1,191 +0,0 @@
-import os
-from typing import (
-    Any,
-    Union,
-)
-import zipfile
-import streamlit as st
-from streamlit.runtime.uploaded_file_manager import (
-    UploadedFile,
-    UploadedFileRec,
-    UploadedFileManager,
-)
-from streamlit.runtime.scriptrunner import get_script_run_ctx
-from supabase.client import Client
-from langchain.vectorstores.supabase import SupabaseVectorStore
-from components_keys import ComponentsKeys
-from loaders.audio import process_audio
-from loaders.txt import process_txt
-from loaders.csv import process_csv
-from loaders.markdown import process_markdown
-from loaders.pdf import process_pdf
-from loaders.html import (
-    create_html_file,
-    delete_tempfile,
-    get_html,
-    process_html,
-)
-from loaders.powerpoint import process_powerpoint
-from loaders.docx import process_docx
-from utils import compute_sha1_from_content
-ctx = get_script_run_ctx()
-manager = UploadedFileManager()
-file_processors = {
-    ".txt": process_txt,
-    ".csv": process_csv,
-    ".md": process_markdown,
-    ".markdown": process_markdown,
-    ".m4a": process_audio,
-    ".mp3": process_audio,
-    ".webm": process_audio,
-    ".mp4": process_audio,
-    ".mpga": process_audio,
-    ".wav": process_audio,
-    ".mpeg": process_audio,
-    ".pdf": process_pdf,
-    ".html": process_html,
-    ".pptx": process_powerpoint,
-    ".docx": process_docx
-}
-def file_uploader(supabase, vector_store):
-    # Omit zip file support if the `st.secrets.self_hosted` != "true" because
-    # a zip file can consist of multiple files so the limit on 1 file uploaded
-    # at a time in the demo can be circumvented.
-    accepted_file_extensions = list(file_processors.keys())
-    accept_multiple_files = st.secrets.self_hosted == "true"
-    if accept_multiple_files:
-        accepted_file_extensions += [".zip"]
-    files = st.file_uploader(
-        "**Upload a file**",
-        accept_multiple_files=accept_multiple_files,
-        type=accepted_file_extensions,
-        key=ComponentsKeys.FILE_UPLOADER,
-    )
-    if st.secrets.self_hosted == "false":
-        st.markdown("**In demo mode, the max file size is 1MB**")
-    if st.button("Add to Database"):
-        # Single file upload
-        if isinstance(files, UploadedFile):
-            filter_file(files, supabase, vector_store)
-        # Multiple files upload
-        elif isinstance(files, list):
-            for file in files:
-                filter_file(file, supabase, vector_store)
-def file_already_exists(supabase, file):
-    file_sha1 = compute_sha1_from_content(file.getvalue())
-    response = supabase.table("documents").select("id").eq("metadata->>file_sha1", file_sha1).execute()
-    return len(response.data) > 0
-def file_to_uploaded_file(file: Any) -> Union[None, UploadedFile]:
-    """Convert a file to a streamlit `UploadedFile` object.
-    This allows us to unzip files and treat them the same way
-    streamlit treats files uploaded through the file uploader.
-    Parameters
-    ---------
-    file : Any
-        The file. Can be any file supported by this app.
-    Returns
-    -------
-    Union[None, UploadedFile]
-        The file converted to a streamlit `UploadedFile` object.
-        Returns `None` if the script context cannot be grabbed.
-    """
-    if ctx is None:
-        print("script context not found, skipping uploading file:", file.name)
-        return
-    file_extension = os.path.splitext(file.name)[-1]
-    file_name = file.name
-    file_data = file.read()
-    # The file manager will automatically assign an ID so pass `None`
-    # Reference: https://github.com/streamlit/streamlit/blob/9a6ce804b7977bdc1f18906d1672c45f9a9b3398/lib/streamlit/runtime/uploaded_file_manager.py#LL98C6-L98C6
-    uploaded_file_rec = UploadedFileRec(None, file_name, file_extension, file_data)
-    uploaded_file_rec = manager.add_file(
-        ctx.session_id,
-        ComponentsKeys.FILE_UPLOADER,
-        uploaded_file_rec,
-    )
-    return UploadedFile(uploaded_file_rec)
-def filter_zip_file(
-    file: UploadedFile,
-    supabase: Client,
-    vector_store: SupabaseVectorStore,
-) -> None:
-    """Unzip the zip file then filter each unzipped file.
-    Parameters
-    ----------
-    file : UploadedFile
-        The uploaded file from the file uploader.
-    supabase : Client
-        The supabase client.
-    vector_store : SupabaseVectorStore
-        The vector store in the database.
-    """
-    with zipfile.ZipFile(file, "r") as z:
-        unzipped_files = z.namelist()
-        for unzipped_file in unzipped_files:
-            with z.open(unzipped_file, "r") as f:
-                filter_file(f, supabase, vector_store)
-def filter_file(file, supabase, vector_store):
-    # Streamlit file uploads are of type `UploadedFile` which has the
-    # necessary methods and attributes for this app to work.
-    if not isinstance(file, UploadedFile):
-        file = file_to_uploaded_file(file)
-    file_extension = os.path.splitext(file.name)[-1]
-    if file_extension == ".zip":
-        filter_zip_file(file, supabase, vector_store)
-        return True
-    if file_already_exists(supabase, file):
-        st.write(f"😎 {file.name} is already in the database.")
-        return False
-    if file.size < 1:
-        st.write(f"💨 {file.name} is empty.")
-        return False
-    if file_extension in file_processors:
-        if st.secrets.self_hosted == "false":
-            file_processors[file_extension](vector_store, file, stats_db=supabase)
-        else:
-            file_processors[file_extension](vector_store, file, stats_db=None)
-        st.write(f"✅ {file.name} ")
-        return True
-    st.write(f"❌ {file.name} is not a valid file type.")
-    return False
-def url_uploader(supabase, vector_store):
-    url = st.text_area("**Add an url**",placeholder="")
-    button = st.button("Add the URL to the database")
-    if button:
-        if not st.session_state["overused"]:
-            html = get_html(url)
-            if html:
-                st.write(f"Getting content ... {url}  ")
-                try:
-                    file, temp_file_path = create_html_file(url, html)
-                except UnicodeEncodeError as e:
-                    st.write(f"❌ Error encoding character: {e}")
-                file, temp_file_path = create_html_file(url, html)
-                ret = filter_file(file, supabase, vector_store)
-                delete_tempfile(temp_file_path, url, ret)
-            else:
-                st.write(f"❌ Failed to access to {url} .")
-        else:
-            st.write("You have reached your daily limit. Please come back later or self host the solution.")

streamlit-demo/loaders/__init__.py DELETED Viewed

File without changes

streamlit-demo/loaders/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (148 Bytes)

streamlit-demo/loaders/__pycache__/audio.cpython-310.pyc DELETED Viewed

Binary file (2.39 kB)

streamlit-demo/loaders/__pycache__/common.cpython-310.pyc DELETED Viewed

Binary file (1.7 kB)

streamlit-demo/loaders/__pycache__/csv.cpython-310.pyc DELETED Viewed

Binary file (429 Bytes)

streamlit-demo/loaders/__pycache__/docx.cpython-310.pyc DELETED Viewed

Binary file (426 Bytes)

streamlit-demo/loaders/__pycache__/html.cpython-310.pyc DELETED Viewed

Binary file (1.97 kB)

streamlit-demo/loaders/__pycache__/markdown.cpython-310.pyc DELETED Viewed

Binary file (444 Bytes)

streamlit-demo/loaders/__pycache__/pdf.cpython-310.pyc DELETED Viewed

Binary file (420 Bytes)

streamlit-demo/loaders/__pycache__/powerpoint.cpython-310.pyc DELETED Viewed

Binary file (452 Bytes)

streamlit-demo/loaders/__pycache__/txt.cpython-310.pyc DELETED Viewed

Binary file (419 Bytes)

streamlit-demo/loaders/audio.py DELETED Viewed

@@ -1,65 +0,0 @@
-import os
-import tempfile
-from io import BytesIO
-import time
-import openai
-import streamlit as st
-from langchain.document_loaders import TextLoader
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from utils import compute_sha1_from_content
-from langchain.schema import Document
-from stats import add_usage
-# Create a function to transcribe audio using Whisper
-def _transcribe_audio(api_key, audio_file, stats_db):
-    openai.api_key = api_key
-    transcript = ""
-    with BytesIO(audio_file.read()) as audio_bytes:
-        # Get the extension of the uploaded file
-        file_extension = os.path.splitext(audio_file.name)[-1]
-        # Create a temporary file with the uploaded audio data and the correct extension
-        with tempfile.NamedTemporaryFile(delete=True, suffix=file_extension) as temp_audio_file:
-            temp_audio_file.write(audio_bytes.read())
-            temp_audio_file.seek(0)  # Move the file pointer to the beginning of the file
-            # Transcribe the temporary audio file
-            if st.secrets.self_hosted == "false":
-                    add_usage(stats_db, "embedding", "audio", metadata={"file_name": audio_file.name,"file_type": file_extension})
-            transcript = openai.Audio.translate("whisper-1", temp_audio_file)
-    return transcript
-def process_audio(vector_store, file_name, stats_db):
-    if st.secrets.self_hosted == "false":
-        if file_name.size > 10000000:
-            st.error("File size is too large. Please upload a file smaller than 1MB.")
-            return
-    file_sha = ""
-    dateshort = time.strftime("%Y%m%d-%H%M%S")
-    file_meta_name = f"audiotranscript_{dateshort}.txt"
-    openai_api_key = st.secrets["openai_api_key"]
-    transcript = _transcribe_audio(openai_api_key, file_name, stats_db)
-    file_sha = compute_sha1_from_content(transcript.text.encode("utf-8"))
-    ## file size computed from transcript
-    file_size = len(transcript.text.encode("utf-8"))
-    ## Load chunk size and overlap from sidebar
-    chunk_size = st.session_state['chunk_size']
-    chunk_overlap = st.session_state['chunk_overlap']
-    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
-    texts = text_splitter.split_text(transcript.text)
-    docs_with_metadata = [Document(page_content=text, metadata={"file_sha1": file_sha,"file_size": file_size, "file_name": file_meta_name, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap, "date": dateshort}) for text in texts]
-    if st.secrets.self_hosted == "false":
-        add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})
-    vector_store.add_documents(docs_with_metadata)
-    return vector_store

streamlit-demo/loaders/common.py DELETED Viewed

@@ -1,42 +0,0 @@
-import tempfile
-import time
-import os
-from utils import compute_sha1_from_file
-from langchain.schema import Document
-import streamlit as st
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from stats import add_usage
-def process_file(vector_store, file, loader_class, file_suffix, stats_db=None):
-    documents = []
-    file_name = file.name
-    file_size = file.size
-    if st.secrets.self_hosted == "false":
-        if file_size > 1000000:
-            st.error("File size is too large. Please upload a file smaller than 1MB or self host.")
-            return
-    dateshort = time.strftime("%Y%m%d")
-    with tempfile.NamedTemporaryFile(delete=False, suffix=file_suffix) as tmp_file:
-        tmp_file.write(file.getvalue())
-        tmp_file.flush()
-        loader = loader_class(tmp_file.name)
-        documents = loader.load()
-        file_sha1 = compute_sha1_from_file(tmp_file.name)
-    os.remove(tmp_file.name)
-    chunk_size = st.session_state['chunk_size']
-    chunk_overlap = st.session_state['chunk_overlap']
-    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
-    documents = text_splitter.split_documents(documents)
-    # Add the document sha1 as metadata to each document
-    docs_with_metadata = [Document(page_content=doc.page_content, metadata={"file_sha1": file_sha1,"file_size":file_size ,"file_name": file_name, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap, "date": dateshort}) for doc in documents]
-    vector_store.add_documents(docs_with_metadata)
-    if stats_db:
-        add_usage(stats_db, "embedding", "file", metadata={"file_name": file_name,"file_type": file_suffix, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})

streamlit-demo/loaders/csv.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .common import process_file
-from langchain.document_loaders.csv_loader import CSVLoader
-def process_csv(vector_store, file,stats_db):
-    return process_file(vector_store, file, CSVLoader, ".csv",stats_db=stats_db)

streamlit-demo/loaders/docx.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .common import process_file
-from langchain.document_loaders import Docx2txtLoader
-def process_docx(vector_store, file, stats_db):
-    return process_file(vector_store, file, Docx2txtLoader, ".docx", stats_db=stats_db)

streamlit-demo/loaders/html.py DELETED Viewed

@@ -1,47 +0,0 @@
-from .common import process_file
-from langchain.document_loaders import UnstructuredHTMLLoader
-import requests
-import re
-import unicodedata
-import tempfile
-import os
-import streamlit as st
-from streamlit.runtime.uploaded_file_manager import UploadedFileRec, UploadedFile
-def process_html(vector_store, file, stats_db):
-    return process_file(vector_store, file, UnstructuredHTMLLoader, ".html", stats_db=stats_db)
-def get_html(url):
-    response = requests.get(url)
-    if response.status_code == 200:
-        return response.text
-    else:
-        return None
-def create_html_file(url, content):
-    file_name = slugify(url) + ".html"
-    temp_file_path = os.path.join(tempfile.gettempdir(), file_name)
-    with open(temp_file_path, 'w') as temp_file:
-        temp_file.write(content)
-    record = UploadedFileRec(id=None, name=file_name, type='text/html', data=open(temp_file_path, 'rb').read())
-    uploaded_file = UploadedFile(record)
-    return uploaded_file, temp_file_path
-def delete_tempfile(temp_file_path, url, ret):
-    try:
-        os.remove(temp_file_path)
-        if ret:
-            st.write(f"✅ Content saved... {url}  ")
-    except OSError as e:
-        print(f"Error while deleting the temporary file: {str(e)}")
-        if ret:
-            st.write(f"❌ Error while saving content... {url}  ")
-def slugify(text):
-    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
-    text = re.sub(r'[^\w\s-]', '', text).strip().lower()
-    text = re.sub(r'[-\s]+', '-', text)
-    return text

streamlit-demo/loaders/markdown.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .common import process_file
-from langchain.document_loaders import UnstructuredMarkdownLoader
-def process_markdown(vector_store, file, stats_db):
-    return process_file(vector_store, file, UnstructuredMarkdownLoader, ".md", stats_db=stats_db)

streamlit-demo/loaders/pdf.py DELETED Viewed

@@ -1,6 +0,0 @@
-from .common import process_file
-from langchain.document_loaders import PyPDFLoader
-def process_pdf(vector_store, file, stats_db):
-    return process_file(vector_store, file, PyPDFLoader, ".pdf", stats_db=stats_db)

streamlit-demo/loaders/powerpoint.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .common import process_file
-from langchain.document_loaders import UnstructuredPowerPointLoader
-def process_powerpoint(vector_store, file, stats_db):
-    return process_file(vector_store, file, UnstructuredPowerPointLoader, ".pptx", stats_db=stats_db)

streamlit-demo/loaders/txt.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .common import process_file
-from langchain.document_loaders import TextLoader
-def process_txt(vector_store, file,stats_db):
-    return process_file(vector_store, file, TextLoader, ".txt", stats_db=stats_db)

streamlit-demo/question.py DELETED Viewed

@@ -1,81 +0,0 @@
-import anthropic
-import streamlit as st
-from streamlit.logger import get_logger
-from langchain.chains import ConversationalRetrievalChain
-from langchain.memory import ConversationBufferMemory
-from langchain.llms import OpenAI
-from langchain.chat_models import ChatAnthropic
-from langchain.vectorstores import SupabaseVectorStore
-from stats import add_usage
-memory = ConversationBufferMemory(
-    memory_key="chat_history", return_messages=True)
-openai_api_key = st.secrets.openai_api_key
-anthropic_api_key = st.secrets.anthropic_api_key
-logger = get_logger(__name__)
-def count_tokens(question, model):
-    count = f'Words: {len(question.split())}'
-    if model.startswith("claude"):
-        count += f' | Tokens: {anthropic.count_tokens(question)}'
-    return count
-def chat_with_doc(model, vector_store: SupabaseVectorStore, stats_db):
-    if 'chat_history' not in st.session_state:
-        st.session_state['chat_history'] = []
-    question = st.text_area("## Ask a question")
-    columns = st.columns(3)
-    with columns[0]:
-        button = st.button("Ask")
-    with columns[1]:
-        count_button = st.button("Count Tokens", type='secondary')
-    with columns[2]:
-        clear_history = st.button("Clear History", type='secondary')
-    if clear_history:
-        # Clear memory in Langchain
-        memory.clear()
-        st.session_state['chat_history'] = []
-        st.experimental_rerun()
-    if button:
-        qa = None
-        if not st.session_state["overused"]:
-            add_usage(stats_db, "chat", "prompt" + question, {"model": model, "temperature": st.session_state['temperature']})
-            if model.startswith("gpt"):
-                logger.info('Using OpenAI model %s', model)
-                qa = ConversationalRetrievalChain.from_llm(
-                    OpenAI(
-                        model_name=st.session_state['model'], openai_api_key=openai_api_key, temperature=st.session_state['temperature'], max_tokens=st.session_state['max_tokens']), vector_store.as_retriever(), memory=memory, verbose=True)
-            elif anthropic_api_key and model.startswith("claude"):
-                logger.info('Using Anthropics model %s', model)
-                qa = ConversationalRetrievalChain.from_llm(
-                    ChatAnthropic(
-                        model=st.session_state['model'], anthropic_api_key=anthropic_api_key, temperature=st.session_state['temperature'], max_tokens_to_sample=st.session_state['max_tokens']), vector_store.as_retriever(), memory=memory, verbose=True, max_tokens_limit=102400)
-            st.session_state['chat_history'].append(("You", question))
-            # Generate model's response and add it to chat history
-            model_response = qa({"question": question})
-            logger.info('Result: %s', model_response)
-            st.session_state['chat_history'].append(("KPMG GPT", model_response["answer"]))
-            # Display chat history
-            st.empty()
-            for speaker, text in st.session_state['chat_history']:
-                st.markdown(f"**{speaker}:** {text}")
-        else:
-            st.error("You have used all your free credits. Please try again later or self host.")
-    if count_button:
-        st.write(count_tokens(question, model))

streamlit-demo/requirements.txt DELETED Viewed

@@ -1,14 +0,0 @@
-langchain==0.0.166
-Markdown==3.4.3
-openai==0.27.6
-pdf2image==1.16.3
-pypdf==3.8.1
-streamlit==1.22.0
-StrEnum==0.4.10
-supabase==1.0.3
-tiktoken==0.4.0
-unstructured==0.6.5
-anthropic==0.2.8
-fastapi==0.95.2
-python-multipart==0.0.6
-uvicorn==0.22.0

streamlit-demo/sidebar.py DELETED Viewed

@@ -1,11 +0,0 @@
-import streamlit as st
-def sidebar(supabase):
-    st.sidebar.title("Database Information")
-    number_of_docs = number_of_documents(supabase)
-    st.sidebar.markdown(f"**Docs in DB:**  {number_of_docs}")
-def number_of_documents(supabase):
-    documents = supabase.table("documents").select("id", count="exact").execute()
-    return documents.count

streamlit-demo/stats.py DELETED Viewed

@@ -1,31 +0,0 @@
-from datetime import datetime, timedelta
-# -- Create a table called "stats"
-# create table
-#   stats (
-#     -- A column called "time" with data type "timestamp"
-#     time timestamp,
-#     -- A column called "details" with data type "text"
-#     chat boolean,
-#     embedding boolean,
-#     details text,
-#     metadata jsonb,
-#     -- An "integer" primary key column called "id" that is generated always as identity
-#     id integer primary key generated always as identity
-#   );
-def get_usage_today(supabase):
-    # Returns the number of rows in the stats table for the last 24 hours
-    response = supabase.table("stats").select("id", count="exact").gte("time", datetime.now() - timedelta(hours=24)).execute()
-    return response.count
-def add_usage(supabase, type, details, metadata):
-    # Adds a row to the stats table
-    supabase.table("stats").insert({
-        "time": datetime.now().isoformat(),
-        "chat": type == "chat",
-        "embedding": type == "embedding",
-        "details": details,
-        "metadata": metadata
-    }).execute()

streamlit-demo/utils.py DELETED Viewed

@@ -1,11 +0,0 @@
-import hashlib
-def compute_sha1_from_file(file_path):
-    with open(file_path, "rb") as file:
-        bytes = file.read()
-        readable_hash = compute_sha1_from_content(bytes)
-    return readable_hash
-def compute_sha1_from_content(content):
-    readable_hash = hashlib.sha1(content).hexdigest()
-    return readable_hash