Spaces:

BitBasher
/

EduConnect

Build error

App Files Files Community

dtyago commited on Feb 20, 2024

Commit

acdfb5c

1 Parent(s): 97db660

Cleanup project structure

Browse files

Files changed (15) hide show

Dockerfile +22 -21
README.md +2 -2
app/admin/admin_functions.py +8 -9
app/api/userchat.py +3 -3
app/api/userlogin.py +3 -3
app/api/userlogout.py +2 -2
app/api/userupload.py +3 -3
app/dependencies.py +2 -2
app/main.py +9 -9
app/utils/__init__.py +1 -2
app/utils/chat_rag.py +21 -4
app/utils/db.py +2 -2
docker-compose.yml +0 -17
entrypoint.sh +7 -4
requirements.txt +2 -1

Dockerfile CHANGED Viewed

@@ -10,18 +10,14 @@ RUN apt-get update && apt-get install -y \
 RUN useradd -m -u 1000 user
 # Set environment variables for the non-root user
-# Name -> Name of the app container
-# EC_ADMIN_PWD -> A secret
-# HF_MODEL_NAME -> Name of the Hugging Face Hub model
-# GGUF_MODEL_URL -> For special loading for GGUF
-# MODEL_CLASS -> A switch to load 'gguf' or 'hf'
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH \
     NAME=EduConnect \
     EC_ADMIN_PWD='$2b$12$wGncNhE7OVmsb7TKFuNPKuJfKOIKdGtw302VMDJbAPrHrY73jqID.' \
     HF_MODEL_NAME="BitBasher/llama-2-7b-mini-ibased-GGUF" \
     GGUF_MODEL_URL='https://huggingface.co/BitBasher/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf' \
-    MODEL_CLASS='gguf'
 # Set the non-root user's home directory as the working directory
 WORKDIR $HOME
@@ -32,34 +28,39 @@ RUN mkdir -p ./data && chown user:user ./data
 # Change to the non-root user
 USER user
-# Set the working directory to where the application files will be located
-WORKDIR $HOME/app
 # Copy only the requirements.txt first to leverage Docker cache
-COPY --chown=user:user requirements.txt ./
 # Install any needed packages specified in requirements.txt
-# As the non-root user, ensure packages are installed to the user's home directory
-RUN pip install --no-cache-dir --user -r requirements.txt
 # Copy the rest of the application files into the container
-COPY --chown=user:user . .
 # Make port 7860 available to the world outside this container
 EXPOSE 7860
 # Indicate that a volume is expected at /home/user/data
-# This directory is intended for persistent storage
 VOLUME /home/user/data
-# Copy the entrypoint script into the container and ensure it is executable
-COPY --chown=user:user entrypoint.sh $HOME
 # Change permission of entrypoint.sh and make sure it is executable
-RUN chmod +x $HOME/entrypoint.sh
-# Set the entrypoint script to be executed when the container starts
-ENTRYPOINT ["./entrypoint.sh"]
-# Run the FastAPI application using Uvicorn, binding to port 7860
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

 RUN useradd -m -u 1000 user
 # Set environment variables for the non-root user
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH \
     NAME=EduConnect \
     EC_ADMIN_PWD='$2b$12$wGncNhE7OVmsb7TKFuNPKuJfKOIKdGtw302VMDJbAPrHrY73jqID.' \
     HF_MODEL_NAME="BitBasher/llama-2-7b-mini-ibased-GGUF" \
     GGUF_MODEL_URL='https://huggingface.co/BitBasher/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf' \
+    MODEL_CLASS='gguf' \
+    CHROMADB_LOC='/home/user/data/chromadb'
 # Set the non-root user's home directory as the working directory
 WORKDIR $HOME
 # Change to the non-root user
 USER user
 # Copy only the requirements.txt first to leverage Docker cache
+COPY --chown=user:user requirements.txt $HOME/app/
+# Copy Static files for Jinja2 templates
+COPY --chown=user:user ./static /home/user/app/static
 # Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir --user -r $HOME/app/requirements.txt
+# Set the working directory to where the application files will be located
+WORKDIR $HOME/app
 # Copy the rest of the application files into the container
+COPY --chown=user:user ./app .
 # Make port 7860 available to the world outside this container
 EXPOSE 7860
 # Indicate that a volume is expected at /home/user/data
 VOLUME /home/user/data
+# Adjust the COPY command for the entrypoint script to ensure correct placement
+COPY --chown=user:user entrypoint.sh /home/user/entrypoint.sh
 # Change permission of entrypoint.sh and make sure it is executable
+RUN chmod +x /home/user/entrypoint.sh
+# Update the ENTRYPOINT command to use the full path to entrypoint.sh
+ENTRYPOINT ["/home/user/entrypoint.sh"]
+# Set Python path just to make sure
+ENV PYTHONPATH="/home/user/app:${PYTHONPATH}"
+# Adjust the CMD to ensure it correctly references the FastAPI app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -11,7 +11,7 @@ license: apache-2.0
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 ```
-EduConnect/
 ├── app/
 │   ├── __init__.py                  # Initializes the FastAPI app and global configurations
 │   ├── main.py                      # Entry point for the FastAPI application, defining routes
@@ -43,5 +43,5 @@ EduConnect/
 ├── Dockerfile                       # Docker configuration for setting up the environment
 ├── requirements.txt                 # Lists all Python library dependencies
 ├── entrypoint.sh                    # To download model from hugging face
-└── .env                             # Environment variables for configuration settings
 ```

 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 ```
+**EduConnect/
 ├── app/
 │   ├── __init__.py                  # Initializes the FastAPI app and global configurations
 │   ├── main.py                      # Entry point for the FastAPI application, defining routes
 ├── Dockerfile                       # Docker configuration for setting up the environment
 ├── requirements.txt                 # Lists all Python library dependencies
 ├── entrypoint.sh                    # To download model from hugging face
+└── .env                             # Environment variables for configuration settings**
 ```

app/admin/admin_functions.py CHANGED Viewed

@@ -1,19 +1,18 @@
 from fastapi import HTTPException, UploadFile, File, Form
 from typing import Optional
 import bcrypt
 import os
 import shutil
 # Import vector store for database operations
-from langchain.vectorstores import Chroma
 # Import embeddings module from langchain for vector representations of text
-from langchain.embeddings import HuggingFaceEmbeddings
-from app.main import CHROMADB_LOC
-from app.utils.chat_rag import sanitize_collection_name
-from ..utils import get_user_cropped_image_from_photo
 # Registrering a face
 async def register_user(db, email: str, name: str, role: str, file: UploadFile = File(...)):
@@ -104,7 +103,7 @@ def faces_count(client, db):
 def remove_all_faces(client, user_faces_collection="user_faces_db"):
     # Fetch all user IDs from the user_faces_db collection
     all_user_ids = client.get_all_ids(collection_name=user_faces_collection)
     # Loop through all user IDs and delete associated collections
     for user_id in all_user_ids:
         sanitized_collection_name = sanitize_collection_name(user_id)

+import hashlib
+import re
 from fastapi import HTTPException, UploadFile, File, Form
 from typing import Optional
 import bcrypt
 import os
 import shutil
+from utils.chat_rag import sanitize_collection_name
+from utils.ec_image_utils import get_user_cropped_image_from_photo
 # Import vector store for database operations
+from langchain_community.vectorstores import Chroma
 # Import embeddings module from langchain for vector representations of text
+from langchain_community.embeddings import HuggingFaceEmbeddings
 # Registrering a face
 async def register_user(db, email: str, name: str, role: str, file: UploadFile = File(...)):
 def remove_all_faces(client, user_faces_collection="user_faces_db"):
     # Fetch all user IDs from the user_faces_db collection
     all_user_ids = client.get_all_ids(collection_name=user_faces_collection)
+    CHROMADB_LOC = os.getenv('CHROMADB_LOC')
     # Loop through all user IDs and delete associated collections
     for user_id in all_user_ids:
         sanitized_collection_name = sanitize_collection_name(user_id)

app/api/userchat.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from fastapi import APIRouter, Depends, HTTPException, Body
-from ..dependencies import get_current_user
-from ..utils.chat_rag import llm_infer
-from ..utils.chat_rag import sanitize_collection_name
 from typing import Any
 router = APIRouter()

 from fastapi import APIRouter, Depends, HTTPException, Body
+from dependencies import get_current_user
+from utils.chat_rag import llm_infer
+from utils.chat_rag import sanitize_collection_name
 from typing import Any
 router = APIRouter()

app/api/userlogin.py CHANGED Viewed

@@ -1,9 +1,9 @@
 from datetime import datetime, timedelta
 from typing import Optional
 from fastapi import APIRouter, File, UploadFile, HTTPException
-from ..utils.db import tinydb_helper, chromadb_face_helper
-from ..utils.jwt_utils import create_access_token
-from ..utils.ec_image_utils import get_user_cropped_image_from_photo
 import os
 import uuid

 from datetime import datetime, timedelta
 from typing import Optional
 from fastapi import APIRouter, File, UploadFile, HTTPException
+from utils.db import tinydb_helper, chromadb_face_helper
+from utils.jwt_utils import create_access_token
+from utils.ec_image_utils import get_user_cropped_image_from_photo
 import os
 import uuid

app/api/userlogout.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Any
 from fastapi import APIRouter, Depends, HTTPException
-from ..utils.db import tinydb_helper  # Ensure this import is correct based on our project structure
-from ..dependencies import get_current_user, oauth2_scheme
 router = APIRouter()

 from typing import Any
 from fastapi import APIRouter, Depends, HTTPException
+from utils.db import tinydb_helper  # Ensure this import is correct based on our project structure
+from dependencies import get_current_user, oauth2_scheme
 router = APIRouter()

app/api/userupload.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from typing import Any
 from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
 import os
-from app.dependencies import get_current_user
 # Assuming a utility for processing PDFs and generating embeddings
-from ..utils.doc_ingest import ingest_document
-from ..utils.chat_rag import sanitize_collection_name
 router = APIRouter()

 from typing import Any
 from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
 import os
+from dependencies import get_current_user
 # Assuming a utility for processing PDFs and generating embeddings
+from utils.doc_ingest import ingest_document
+from utils.chat_rag import sanitize_collection_name
 router = APIRouter()

app/dependencies.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from fastapi import Depends, HTTPException, status
 from fastapi.security import OAuth2PasswordBearer
 from jose import jwt, JWTError  # Ensure this is correctly imported
-from .utils.db import tinydb_helper  # Ensure this instance is correctly initialized elsewhere
-from .utils.jwt_utils import SECRET_KEY, ALGORITHM, decode_jwt
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")

 from fastapi import Depends, HTTPException, status
 from fastapi.security import OAuth2PasswordBearer
 from jose import jwt, JWTError  # Ensure this is correctly imported
+from utils.db import tinydb_helper  # Ensure this instance is correctly initialized elsewhere
+from utils.jwt_utils import SECRET_KEY, ALGORITHM, decode_jwt
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")

app/main.py CHANGED Viewed

@@ -7,15 +7,15 @@ from fastapi.staticfiles import StaticFiles
 from fastapi.responses import HTMLResponse, RedirectResponse
 from fastapi.templating import Jinja2Templates
-from .admin import admin_functions as admin
-from .utils.db import UserFaceEmbeddingFunction,ChromaDBFaceHelper
-from .api import userlogin, userlogout, userchat, userupload
-from .utils.db import ChromaDBFaceHelper
-CHROMADB_LOC = "/home/user/data/chromadb"
 app = FastAPI()
 # Add middleware
 # Set all origins to wildcard for simplicity, but we should limit this in production
 app.add_middleware(
@@ -43,10 +43,10 @@ async def startup_event():
     print(f"MODEL_PATH in main.py = {os.getenv('MODEL_PATH')} ")
 # Mount static files
-app.mount("/static", StaticFiles(directory="static"), name="static")
 # Setup Jinja2Templates to point to the templates directory
-templates = Jinja2Templates(directory="app/admin/templates")
 @app.get("/")
 async def get_admin_login(request: Request):
@@ -90,7 +90,7 @@ async def handle_user_registration(request: Request, email: str = Form(...), nam
 @app.get("/admin/data_management", response_class=HTMLResponse)
 async def get_db_details(request: Request):
     # Render the Chroma DB details
-    faces = admin.faces_count(user_faces_db)
     return templates.TemplateResponse("data_management.html", {
         "request": request,
         "faces" : faces

 from fastapi.responses import HTMLResponse, RedirectResponse
 from fastapi.templating import Jinja2Templates
+from admin import admin_functions as admin
+from utils.db import UserFaceEmbeddingFunction,ChromaDBFaceHelper
+from api import userlogin, userlogout, userchat, userupload
+from utils.db import ChromaDBFaceHelper
 app = FastAPI()
+CHROMADB_LOC = os.getenv('CHROMADB_LOC')
 # Add middleware
 # Set all origins to wildcard for simplicity, but we should limit this in production
 app.add_middleware(
     print(f"MODEL_PATH in main.py = {os.getenv('MODEL_PATH')} ")
 # Mount static files
+app.mount("/home/user/app/static", StaticFiles(directory="static"), name="static")
 # Setup Jinja2Templates to point to the templates directory
+templates = Jinja2Templates(directory="admin/templates")
 @app.get("/")
 async def get_admin_login(request: Request):
 @app.get("/admin/data_management", response_class=HTMLResponse)
 async def get_db_details(request: Request):
     # Render the Chroma DB details
+    faces = admin.faces_count(ec_client, user_faces_db)
     return templates.TemplateResponse("data_management.html", {
         "request": request,
         "faces" : faces

app/utils/__init__.py CHANGED Viewed

	@@ -1,2 +1 @@
1	- # In utils/__init__.py
2	- from .ec_image_utils import get_user_cropped_image_from_photo


1	+ # In utils/__init__.py

app/utils/chat_rag.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import os
 import re
 import hashlib
 from langchain.document_loaders import PyPDFLoader
 # Import embeddings module from langchain for vector representations of text
@@ -23,13 +24,13 @@ from langchain.chains.router import MultiPromptChain
 from langchain.chains import ConversationChain
 from langchain.chains.llm import LLMChain
 from langchain.prompts import PromptTemplate
-from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-CHROMADB_LOC = "/home/user/data/chromadb"
 def sanitize_collection_name(email):
     # Replace invalid characters with an underscore
@@ -47,7 +48,9 @@ def sanitize_collection_name(email):
 # Modify vectordb initialization to be dynamic based on user_id
 def get_vectordb_for_user(user_collection_name):
     vectordb = Chroma(
         collection_name=user_collection_name,
         embedding_function=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'),
@@ -57,6 +60,10 @@ def get_vectordb_for_user(user_collection_name):
 def pdf_to_vec(filename, user_collection_name):
     document = []
     loader = PyPDFLoader(filename)
     document.extend(loader.load()) #which library is this from?
@@ -111,6 +118,9 @@ def load_llm():
 #step 5, to instantiate once to create default_chain,router_chain,destination_chains into chain and set vectordb. so will not re-create per prompt
 def default_chain(llm, user_collection_name):
     vectordb = get_vectordb_for_user(user_collection_name)  # Use the dynamic vectordb based on user_id
     sum_template = """
     As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
@@ -162,9 +172,16 @@ def default_chain(llm, user_collection_name):
     for p_info in prompt_infos:
         name = p_info["name"]
         prompt_template = p_info["prompt_template"]
         prompt = PromptTemplate(template=prompt_template, input_variables=["input"])
-        chain = LLMChain(llm=llm, prompt=prompt)
         destination_chains[name] = chain
     #default_chain = ConversationChain(llm=llm, output_key="text")
     #memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

 import os
 import re
 import hashlib
 from langchain.document_loaders import PyPDFLoader
 # Import embeddings module from langchain for vector representations of text
 from langchain.chains import ConversationChain
 from langchain.chains.llm import LLMChain
 from langchain.prompts import PromptTemplate
+from langchain.memory import ConversationBufferMemory, VectorStoreRetrieverMemory
 from langchain.chains import ConversationalRetrievalChain
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 def sanitize_collection_name(email):
     # Replace invalid characters with an underscore
 # Modify vectordb initialization to be dynamic based on user_id
 def get_vectordb_for_user(user_collection_name):
+    # Get Chromadb location
+    CHROMADB_LOC = os.getenv('CHROMADB_LOC')
     vectordb = Chroma(
         collection_name=user_collection_name,
         embedding_function=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'),
 def pdf_to_vec(filename, user_collection_name):
+    # Get Chromadb location
+    CHROMADB_LOC = os.getenv('CHROMADB_LOC')
     document = []
     loader = PyPDFLoader(filename)
     document.extend(loader.load()) #which library is this from?
 #step 5, to instantiate once to create default_chain,router_chain,destination_chains into chain and set vectordb. so will not re-create per prompt
 def default_chain(llm, user_collection_name):
+    # Get Chromadb location
+    CHROMADB_LOC = os.getenv('CHROMADB_LOC')
     vectordb = get_vectordb_for_user(user_collection_name)  # Use the dynamic vectordb based on user_id
     sum_template = """
     As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
     for p_info in prompt_infos:
         name = p_info["name"]
         prompt_template = p_info["prompt_template"]
+        #vectordb=p_info["vector"]
         prompt = PromptTemplate(template=prompt_template, input_variables=["input"])
+        embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') #new
+        vectordb= Chroma(persist_directory = CHROMADB_LOC, embedding_function = embeddings) #new
+        retriever = vectordb.as_retriever()#new
+        memory = VectorStoreRetrieverMemory(retriever=retriever) #new
+        chain = LLMChain(llm=llm, prompt=prompt, verbose=True, memory=memory) #new memory=memory
         destination_chains[name] = chain
     #default_chain = ConversationChain(llm=llm, output_key="text")
     #memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

app/utils/db.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from tinydb import TinyDB, Query, where
 from tinydb.storages import MemoryStorage
 import chromadb
@@ -7,7 +8,6 @@ from keras_facenet import FaceNet
 from typing import Any
 from datetime import datetime, timedelta
-CHROMADB_LOC = "/home/user/data/chromadb"
 class TinyDBHelper:
     def __init__(self):
@@ -77,4 +77,4 @@ class ChromaDBFaceHelper:
 # Initialize these helpers globally if they are to be used across multiple modules
 tinydb_helper = TinyDBHelper()
-chromadb_face_helper = ChromaDBFaceHelper(CHROMADB_LOC)  # Initialization requires db_path

+import os
 from tinydb import TinyDB, Query, where
 from tinydb.storages import MemoryStorage
 import chromadb
 from typing import Any
 from datetime import datetime, timedelta
 class TinyDBHelper:
     def __init__(self):
 # Initialize these helpers globally if they are to be used across multiple modules
 tinydb_helper = TinyDBHelper()
+chromadb_face_helper = ChromaDBFaceHelper(os.getenv('CHROMADB_LOC'))  # Initialization requires db_path

docker-compose.yml DELETED Viewed

@@ -1,17 +0,0 @@
-version: '3.8'
-services:
-  educonnect:
-    image: python:3.9
-    command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--reload"]
-    volumes:
-      - /Users/tyago/Workspace/EduConnect:/home/user/app
-    ports:
-      - "7860:7860"
-    environment:
-      - HOME=/home/user
-      - PATH=/home/user/.local/bin:$PATH
-      - NAME=EduConnect
-    user: "1000:1000"
-    working_dir: /home/user/app
-    env_file:
-      - .env

entrypoint.sh CHANGED Viewed

@@ -1,18 +1,19 @@
 #!/bin/bash
 # Authenticate with Hugging Face
 export HF_HOME=/home/user/data/hf_cache
-# Assuming HF_TOKEN is already exported to the environment
 echo "Using Hugging Face API token for authentication"
 # Navigate to the directory where download_model.py is located
 echo "Determining model path..."
-cd /home/user/app/app/utils
 # Execute the download_model script
 python download_model.py || { echo "Model download failed"; exit 1; }
-# export MODEL_PATH
 # Read the model path from model_path.txt and export it
 MODEL_PATH=$(cat /home/user/data/models/model_path.txt)
 export MODEL_PATH
@@ -20,6 +21,8 @@ echo "@ Entrypoint - MODEL_PATH exported=${MODEL_PATH}"
 # Navigate back to the app directory
 cd /home/user/app
 # Execute the main command of the container
 exec "$@"

 #!/bin/bash
+echo "Starting entrypoint script..."
 # Authenticate with Hugging Face
 export HF_HOME=/home/user/data/hf_cache
 echo "Using Hugging Face API token for authentication"
 # Navigate to the directory where download_model.py is located
 echo "Determining model path..."
+cd /home/user/app/utils
+echo "Current directory for model download: $(pwd)"
 # Execute the download_model script
 python download_model.py || { echo "Model download failed"; exit 1; }
 # Read the model path from model_path.txt and export it
 MODEL_PATH=$(cat /home/user/data/models/model_path.txt)
 export MODEL_PATH
 # Navigate back to the app directory
 cd /home/user/app
+echo "Returning to app directory: $(pwd)"
 # Execute the main command of the container
+echo "Executing main command..."
 exec "$@"

requirements.txt CHANGED Viewed

@@ -18,4 +18,5 @@ tinydb                         # The in memory database for storing JWT tokens
 langchain                      # Langgchain for RAG
 llama-cpp-python               # To load the model
 sentence-transformers          # For text embeddings
-pypdf                          # Handling PDF files

 langchain                      # Langgchain for RAG
 llama-cpp-python               # To load the model
 sentence-transformers          # For text embeddings
+pypdf                          # Handling PDF files
+langchain-community            # Latest library to prevent dprecated warnings