Spaces:

MarkChenX
/

mindify-chat-api-demo

Sleeping

App Files Files Community

MarkChenX commited on Jul 17, 2024

Commit

d903cfe

verified ·

1 Parent(s): d392c8d

Upload 12 files

Browse files

Files changed (12) hide show

Dockerfile +17 -0
README.md +0 -10
cura/__init__.py +0 -0
cura/github_ingestion.py +46 -0
cura/openai_chat.py +22 -0
cura/vector_store.py +79 -0
cura_alpha.ipynb +347 -0
database/__init__.py +34 -0
index.py +62 -0
langgraph_code_assistant.ipynb +0 -0
requirements.txt +6 -0
test_index.py +31 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Use an official fastapi runtime as a parent image
+FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8
+# Set the working directory in the container
+WORKDIR /app
+# Copy the current directory contents into the container at /app
+COPY . /app
+# Install any needed packages specified in requirements.txt
+RUN pip install --trusted-host pypi.python.org -r requirements.txt
+# Make port 80 available to the world outside this container
+EXPOSE 80
+# Run index.py when the container launches
+CMD ["uvicorn", "index:app", "--port", "80", "--reload"]

README.md CHANGED Viewed

@@ -1,11 +1 @@
----
-title: Mindify Chat Api Demo
-emoji: 🦀
-colorFrom: gray
-colorTo: gray
-sdk: docker
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference











1

cura/__init__.py ADDED Viewed

File without changes

cura/github_ingestion.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""
+GitHub Repo File Ingestion and Indexing
+"""
+from langchain_community.document_loaders.github import GithubFileLoader
+from tqdm import tqdm
+def ingest_github_repo(repo_name: str, access_token: str):
+    """
+    Ingests files from a GitHub repository and returns the files as a list of strings.
+    Args:
+    repo_name: str
+        The name of the GitHub repository in the format "username/repo_name".
+    access_token: str
+        The GitHub access token to access the repository.
+    Returns:
+    list
+        A list of strings containing the contents of the files in the repository.
+    """
+    loader = GithubFileLoader(
+        repo=repo_name,
+        access_token=access_token,
+    )
+    # List the directory contents for the repository
+    file_paths = loader.get_file_paths()
+    # Load the files from the repository using curl
+    files = []
+    print("Ingesting files from the repository...")
+    for i in tqdm(range(len(file_paths))):
+        try:
+            file = loader.get_file_content_by_path(file_paths[i]["path"])
+            # If the file is not textual file, skip it
+            if file is None:
+                continue
+            else:
+                files.append(file)
+        except:
+            continue
+    return files, file_paths

cura/openai_chat.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from langchain_openai import ChatOpenAI
+import os
+def ask_question(message: str) -> str:
+    llm = ChatOpenAI(
+        model="gpt-4o",
+        temperature=0,
+        max_tokens=None,
+        timeout=None,
+        max_retries=2,
+        api_key=os.getenv("OPENAI_API_KEY"),
+        # api_key="...",  # if you prefer to pass api key in directly instaed of using env vars
+        # base_url="...",
+        # organization="...",
+        # other params...
+    )
+    try:
+        response = llm.invoke(message)
+        return response
+    except:
+        print("Error in openai_chat.py")

cura/vector_store.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""
+Vector Store for Mindify Chat
+"""
+import chromadb
+def set_up_chromadb(collection_name: str):
+    """
+    Set up a ChromaDB collection for storing vectors.
+    Args:
+    collection_name: str
+        The name of the collection to create or retrieve.
+    Returns:
+    ChromaDB Collection
+        The ChromaDB collection object.
+    """
+    chroma_client = chromadb.Client()
+    try:
+        # Check if the collection already exists
+        collection = chroma_client.get_collection(name=collection_name)
+        return collection
+    except:
+        # Create a new collection
+        collection = chroma_client.create_collection(name=collection_name)
+        return collection
+def index_vector_store(collection_name:str, files: list):
+    """
+    Index the files in the ChromaDB collection.
+    Args:
+    collection: ChromaDB Collection
+        The collection to store the vectors in.
+    files: list
+        A list of strings containing the contents of the files.
+    Returns:
+    bool
+        True if the data is stored successfully, False otherwise.
+    """
+    # Set up collection
+    try:
+        collection = chromadb.Client().get_collection(name=collection_name)
+    except:
+        collection = chromadb.Client().create_collection(name=collection_name)
+    print("Indexing files...")
+    ids = []
+    for i in range(len(files[0])):
+        ids.append(str(i))
+    print("Storing GitHub data in ChromaDB...")
+    try:
+        collection.add(ids=ids, documents=files[0])
+        print("Data stored successfully!")
+        return True
+    except:
+        print("Error storing data in ChromaDB")
+        return False
+def query_vector_store(collection_name: str, query: str):
+    """
+    Query the ChromaDB collection for similar vectors to the query vector.
+    """
+    print("Querying ChromaDB...")
+    try:
+        list_collection = chromadb.Client().list_collections()
+        print(list_collection)
+        collection = chromadb.Client().get_collection(name=collection_name)
+        return collection.query(query_texts=query, n_results=5)
+    except:
+        print("Error querying ChromaDB")
+        return None

cura_alpha.ipynb ADDED Viewed

	@@ -0,0 +1,347 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ingesting GitHub data, please input the following information:\n",
+      "Ingesting GitHub data...\n",
+      "Ingesting files from the repository...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 75/75 [00:43<00:00,  1.73it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from cura import github_ingestion\n",
+    "from cura import vector_store\n",
+    "\n",
+    "print(\"Ingesting GitHub data, please input the following information:\")\n",
+    "url = \"MarkCodering/mindify-website\"\n",
+    "access_token = input(\"GitHub Access Token: \")\n",
+    "\n",
+    "print(\"Ingesting GitHub data...\")\n",
+    "github_repo_data = github_ingestion.ingest_github_repo(url, access_token)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Storing GitHub data in ChromaDB...\n"
+     ]
+    }
+   ],
+   "source": [
+    "collection_name = url.replace(\"/\", \"_\")\n",
+    "collection = vector_store.set_up_chromadb(collection_name)\n",
+    "ids = []\n",
+    "for i in range(len(github_repo_data[0])):\n",
+    "    ids.append(str(i))\n",
+    "    \n",
+    "print(\"Storing GitHub data in ChromaDB...\")\n",
+    "collection.add(ids=ids, documents=github_repo_data[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Querying the data from the vector store...\n",
+      "---\n",
+      "// @ts-ignore\n",
+      "const features = [\n",
+      "  {\n",
+      "    title: \"Learn AI Technologies\",\n",
+      "    description:\n",
+      "      \"We provide online and in-person training to help you learn the latest generative AI technologies.\",\n",
+      "  },\n",
+      "  {\n",
+      "    title: \"Deploy AI Solutions\",\n",
+      "    description:\n",
+      "      \"We provide a platform for developers to deploy generative AI solutions in their projects.\",\n",
+      "  },\n",
+      "  {\n",
+      "    title: \"Fast Prototyping and Concept Validation\",\n",
+      "    description:\n",
+      "      \"We help you quickly prototype and validate your AI concepts to bring them to market faster.\",\n",
+      "  },\n",
+      "];\n",
+      "---\n",
+      "\n",
+      "<div class=\"mt-16 md:mt-0\">\n",
+      "  <h2 class=\"text-4xl lg:text-5xl font-bold lg:tracking-tight text-center\">\n",
+      "    About Mindify AI\n",
+      "  </h2>\n",
+      "  <p class=\"text-lg mt-4 text-slate-600\">\n",
+      "    Mindify is an AI solution company that provides a platform for developers to\n",
+      "    learn and deploy generative AI solutions. We deliver online and in-person\n",
+      "    training to help you learn the latest AI technologies and deploy them in\n",
+      "    your projects. Our mission is to help you bring your AI concepts to market\n",
+      "    faster and deliver value to your customers.\n",
+      "  </p>\n",
+      "</div>\n",
+      "\n",
+      "<div class=\"grid sm:grid-cols-2 md:grid-cols-3 mt-16 gap-16\">\n",
+      "  {\n",
+      "    features.map((item) => (\n",
+      "      <div class=\"flex gap-4 items-start\">\n",
+      "        <div>\n",
+      "          <h3 class=\"font-semibold text-lg\">{item.title}</h3>{\" \"}\n",
+      "          <p class=\"text-slate-500 mt-2 leading-relaxed\">{item.description}</p>\n",
+      "        </div>\n",
+      "      </div>\n",
+      "    ))\n",
+      "  }\n",
+      "</div>\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Query the data from the vector store\n",
+    "print(\"Querying the data from the vector store...\")\n",
+    "prompt = \"What is Mindify AI?\"\n",
+    "results = collection.query(\n",
+    "    query_texts=[prompt], # Chroma will embed this for you\n",
+    "    n_results=2 # how many results to return\n",
+    ")\n",
+    "print(results[\"documents\"][0][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Asking OpenAI the following question: You are a smart and helpful AI programmer and here is the repository I am working on: MarkCodering/mindify-websiteAnd, I wonder if you can help me with the following question with the following question: What is Mindify AI?based on the data in the repository which is available here: ---\n",
+      "// @ts-ignore\n",
+      "const features = [\n",
+      "  {\n",
+      "    title: \"Learn AI Technologies\",\n",
+      "    description:\n",
+      "      \"We provide online and in-person training to help you learn the latest generative AI technologies.\",\n",
+      "  },\n",
+      "  {\n",
+      "    title: \"Deploy AI Solutions\",\n",
+      "    description:\n",
+      "      \"We provide a platform for developers to deploy generative AI solutions in their projects.\",\n",
+      "  },\n",
+      "  {\n",
+      "    title: \"Fast Prototyping and Concept Validation\",\n",
+      "    description:\n",
+      "      \"We help you quickly prototype and validate your AI concepts to bring them to market faster.\",\n",
+      "  },\n",
+      "];\n",
+      "---\n",
+      "\n",
+      "<div class=\"mt-16 md:mt-0\">\n",
+      "  <h2 class=\"text-4xl lg:text-5xl font-bold lg:tracking-tight text-center\">\n",
+      "    About Mindify AI\n",
+      "  </h2>\n",
+      "  <p class=\"text-lg mt-4 text-slate-600\">\n",
+      "    Mindify is an AI solution company that provides a platform for developers to\n",
+      "    learn and deploy generative AI solutions. We deliver online and in-person\n",
+      "    training to help you learn the latest AI technologies and deploy them in\n",
+      "    your projects. Our mission is to help you bring your AI concepts to market\n",
+      "    faster and deliver value to your customers.\n",
+      "  </p>\n",
+      "</div>\n",
+      "\n",
+      "<div class=\"grid sm:grid-cols-2 md:grid-cols-3 mt-16 gap-16\">\n",
+      "  {\n",
+      "    features.map((item) => (\n",
+      "      <div class=\"flex gap-4 items-start\">\n",
+      "        <div>\n",
+      "          <h3 class=\"font-semibold text-lg\">{item.title}</h3>{\" \"}\n",
+      "          <p class=\"text-slate-500 mt-2 leading-relaxed\">{item.description}</p>\n",
+      "        </div>\n",
+      "      </div>\n",
+      "    ))\n",
+      "  }\n",
+      "</div>\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from cura import openai_chat\n",
+    "\n",
+    "question = (\n",
+    "    \"You are a smart and helpful AI programmer and here is the repository I am working on: {}\".format(\n",
+    "        url\n",
+    "    )\n",
+    "    + \"And, I wonder if you can help me with the following question with the following question: {}\".format(\n",
+    "        prompt\n",
+    "    )\n",
+    "    + \"based on the data in the repository which is available here: {}\".format(\n",
+    "        results[\"documents\"][0][0]\n",
+    "    )\n",
+    ")\n",
+    "print(\"Asking OpenAI the following question: {}\".format(question))\n",
+    "\n",
+    "answer = openai_chat.ask_question(question)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Based on the provided data from the repository, Mindify AI is an AI solution company that focuses on providing a platform for developers to learn and deploy generative AI solutions. Here are the key aspects of Mindify AI:\n",
+      "\n",
+      "1. **Learning AI Technologies**: Mindify AI offers both online and in-person training to help individuals and developers learn the latest generative AI technologies.\n",
+      "\n",
+      "2. **Deploying AI Solutions**: The platform allows developers to deploy generative AI solutions in their projects, facilitating the integration of advanced AI capabilities.\n",
+      "\n",
+      "3. **Fast Prototyping and Concept Validation**: Mindify AI assists in quickly prototyping and validating AI concepts, enabling faster time-to-market for AI-driven products and solutions.\n",
+      "\n",
+      "The mission of Mindify AI is to help developers and businesses bring their AI concepts to market more quickly and deliver value to their customers through advanced AI technologies.\n",
+      "\n",
+      "Here is a summary of the features provided by Mindify AI:\n",
+      "- **Learn AI Technologies**: Training programs to learn the latest generative AI technologies.\n",
+      "- **Deploy AI Solutions**: A platform for deploying generative AI solutions in projects.\n",
+      "- **Fast Prototyping and Concept Validation**: Support for rapid prototyping and validation of AI concepts.\n",
+      "\n",
+      "Overall, Mindify AI aims to empower developers and businesses with the knowledge and tools needed to leverage generative AI effectively.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(answer.content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/mark/Documents/Mindify/CURA-alpha/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7860\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import gradio as gr\n",
+    "\n",
+    "def echo(question):\n",
+    "    # Query the collection with the provided question\n",
+    "    results = collection.query(\n",
+    "        query_texts=[question],  # Chroma will embed this for you\n",
+    "        n_results=1  # Number of results to return\n",
+    "    )\n",
+    "    \n",
+    "    # Append the retrieved document to the question\n",
+    "    question = question + results[\"documents\"][0][0]\n",
+    "    \n",
+    "    # Use OpenAI's chat to ask the modified question\n",
+    "    answer = openai_chat.ask_question(question)\n",
+    "    \n",
+    "    # Return the content of the answer\n",
+    "    return answer.content\n",
+    "\n",
+    "# Define the Gradio interface\n",
+    "iface = gr.Interface(\n",
+    "    fn=echo,\n",
+    "    inputs=gr.Textbox(lines=2, placeholder=\"Enter your question here...\"),\n",
+    "    outputs=gr.Code(label=\"Answer\", language=\"markdown\"),\n",
+    ")\n",
+    "\n",
+    "# Launch the Gradio interface\n",
+    "iface.launch()\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

database/__init__.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+# Load load_dotenv to load the .env file
+from dotenv import load_dotenv
+from supabase import create_client, Client
+load_dotenv()
+url: str = os.environ.get("SUPABASE_URL")
+key: str = os.environ.get("SUPABASE_KEY")
+supabase: Client = create_client(url, key)
+def get_supabase() -> Client:
+    return supabase
+def post_github_access_token(token: str, user_emaill: str) -> None:
+    supabase.table("users_github_access_tokens").insert({"github_access_token": token, "user_email": user_emaill}).execute()
+def get_github_access_token(user_email: str):
+    # Get the last access token
+    table_results =  supabase.table("users_github_access_tokens").select("github_access_token").eq("user_email", user_email).execute()
+    # Access the data attribute of the response object
+    data = table_results.data
+    # Check if there are results and return the last token
+    if data:
+        return data[-1]['github_access_token']
+    else:
+        return None  # or handle the case where there is no matching token
+def post_github_repo(repo_name: str, user_email: str) -> None:
+    supabase.table("users_github_repos_name").insert({"repo_name": repo_name, "user_email": user_email}).execute()
+def get_github_repos(user_email: str) -> list:
+    return supabase.table("users_github_repos_name").select("repo_name").eq("user_email", user_email).execute().get("data")

index.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from database import post_github_access_token, post_github_repo, get_github_access_token
+from cura import github_ingestion, vector_store
+app = FastAPI(
+    title="Mindify Chat API",
+    description="API for Mindify Chat",
+    version="0.1"
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"]
+)
+@app.get("/")
+def read_root():
+    return {"Hello": "World"}
+@app.post("/github/access_token")
+def post_github_access_token_route(token: str, user_email: str):
+    post_github_access_token(token, user_email)
+    return {"status": "success"}
+@app.post("/github/repo")
+def post_github_repo_route(repo_name: str, user_email: str):
+    post_github_repo(repo_name, user_email)
+    return {"status": "success"}
+@app.post("/github/index")
+def index_github_repo_route(repo_name: str, user_email: str):
+    access_token = get_github_access_token(user_email)
+    collection_name = repo_name.replace("/", "_")
+    if access_token is not None:
+        files = github_ingestion.ingest_github_repo(repo_name, access_token)
+        results = vector_store.index_vector_store(files=files, collection_name = collection_name)
+        if results:
+            return {"status": "success", "message": "GitHub data stored in ChromaDB"}
+        else:
+            return {"status": "error", "message": "Failed to set up ChromaDB collection"}
+    else:
+        return {"status": "error", "message": "Failed to get GitHub access token"}
+@app.post("/github/query")
+def query_github_repo_route(repo_name: str, query: str):
+    collection_name = repo_name.replace("/", "_")
+    if collection_name is not None:
+        response = vector_store.query_vector_store(collection_name=collection_name, query=query)
+        return {"status": "success", "response": response}
+    else:
+        return {"status": "error", "message": "Failed to set up ChromaDB collection"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app)

langgraph_code_assistant.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi
+langchain_community
+langchain_openai
+supabase
+uvicorn
+chromadb

test_index.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+Unit tests for the index.py file
+"""
+from fastapi.testclient import TestClient
+from index import app
+client = TestClient(app)
+def test_read_root():
+    response = client.get("/")
+    assert response.status_code == 200
+    assert response.json() == {"Hello": "World"}
+def test_post_github_access_token_route():
+    response = client.post("/github/access_token", json={"token": "test_token", "user_email": "test_email"})
+    assert response.status_code == 200
+def test_post_github_repo_route():
+    response = client.post("/github/repo", json={"repo_name": "test_repo", "user_email": "test_email"})
+    assert response.status_code == 200
+def test_index_github_repo_route():
+    response = client.post("/github/index", json={"repo_name": "test_repo", "user_email": "test_email"})
+    assert response.status_code == 200
+def test_query_github_repo_route():
+    response = client.get("/github/query", json={"repo_name": "test_repo", "query": "test_query"})
+    assert response.status_code == 200