Spaces:
Runtime error
Runtime error
Cleanup project structure
Browse files- Dockerfile +22 -21
- README.md +2 -2
- app/admin/admin_functions.py +8 -9
- app/api/userchat.py +3 -3
- app/api/userlogin.py +3 -3
- app/api/userlogout.py +2 -2
- app/api/userupload.py +3 -3
- app/dependencies.py +2 -2
- app/main.py +9 -9
- app/utils/__init__.py +1 -2
- app/utils/chat_rag.py +21 -4
- app/utils/db.py +2 -2
- docker-compose.yml +0 -17
- entrypoint.sh +7 -4
- requirements.txt +2 -1
Dockerfile
CHANGED
|
@@ -10,18 +10,14 @@ RUN apt-get update && apt-get install -y \
|
|
| 10 |
RUN useradd -m -u 1000 user
|
| 11 |
|
| 12 |
# Set environment variables for the non-root user
|
| 13 |
-
# Name -> Name of the app container
|
| 14 |
-
# EC_ADMIN_PWD -> A secret
|
| 15 |
-
# HF_MODEL_NAME -> Name of the Hugging Face Hub model
|
| 16 |
-
# GGUF_MODEL_URL -> For special loading for GGUF
|
| 17 |
-
# MODEL_CLASS -> A switch to load 'gguf' or 'hf'
|
| 18 |
ENV HOME=/home/user \
|
| 19 |
PATH=/home/user/.local/bin:$PATH \
|
| 20 |
NAME=EduConnect \
|
| 21 |
EC_ADMIN_PWD='$2b$12$wGncNhE7OVmsb7TKFuNPKuJfKOIKdGtw302VMDJbAPrHrY73jqID.' \
|
| 22 |
HF_MODEL_NAME="BitBasher/llama-2-7b-mini-ibased-GGUF" \
|
| 23 |
GGUF_MODEL_URL='https://huggingface.co/BitBasher/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf' \
|
| 24 |
-
MODEL_CLASS='gguf'
|
|
|
|
| 25 |
|
| 26 |
# Set the non-root user's home directory as the working directory
|
| 27 |
WORKDIR $HOME
|
|
@@ -32,34 +28,39 @@ RUN mkdir -p ./data && chown user:user ./data
|
|
| 32 |
# Change to the non-root user
|
| 33 |
USER user
|
| 34 |
|
| 35 |
-
# Set the working directory to where the application files will be located
|
| 36 |
-
WORKDIR $HOME/app
|
| 37 |
-
|
| 38 |
# Copy only the requirements.txt first to leverage Docker cache
|
| 39 |
-
COPY --chown=user:user requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
# Install any needed packages specified in requirements.txt
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# Copy the rest of the application files into the container
|
| 46 |
-
COPY --chown=user:user
|
| 47 |
|
| 48 |
# Make port 7860 available to the world outside this container
|
| 49 |
EXPOSE 7860
|
| 50 |
|
| 51 |
# Indicate that a volume is expected at /home/user/data
|
| 52 |
-
# This directory is intended for persistent storage
|
| 53 |
VOLUME /home/user/data
|
| 54 |
|
| 55 |
-
#
|
| 56 |
-
COPY --chown=user:user entrypoint.sh
|
| 57 |
|
| 58 |
# Change permission of entrypoint.sh and make sure it is executable
|
| 59 |
-
RUN chmod +x
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
# Set
|
| 62 |
-
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
CMD ["uvicorn", "
|
|
|
|
| 10 |
RUN useradd -m -u 1000 user
|
| 11 |
|
| 12 |
# Set environment variables for the non-root user
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
ENV HOME=/home/user \
|
| 14 |
PATH=/home/user/.local/bin:$PATH \
|
| 15 |
NAME=EduConnect \
|
| 16 |
EC_ADMIN_PWD='$2b$12$wGncNhE7OVmsb7TKFuNPKuJfKOIKdGtw302VMDJbAPrHrY73jqID.' \
|
| 17 |
HF_MODEL_NAME="BitBasher/llama-2-7b-mini-ibased-GGUF" \
|
| 18 |
GGUF_MODEL_URL='https://huggingface.co/BitBasher/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf' \
|
| 19 |
+
MODEL_CLASS='gguf' \
|
| 20 |
+
CHROMADB_LOC='/home/user/data/chromadb'
|
| 21 |
|
| 22 |
# Set the non-root user's home directory as the working directory
|
| 23 |
WORKDIR $HOME
|
|
|
|
| 28 |
# Change to the non-root user
|
| 29 |
USER user
|
| 30 |
|
|
|
|
|
|
|
|
|
|
| 31 |
# Copy only the requirements.txt first to leverage Docker cache
|
| 32 |
+
COPY --chown=user:user requirements.txt $HOME/app/
|
| 33 |
+
|
| 34 |
+
# Copy Static files for Jinja2 templates
|
| 35 |
+
COPY --chown=user:user ./static /home/user/app/static
|
| 36 |
+
|
| 37 |
|
| 38 |
# Install any needed packages specified in requirements.txt
|
| 39 |
+
RUN pip install --no-cache-dir --user -r $HOME/app/requirements.txt
|
| 40 |
+
|
| 41 |
+
# Set the working directory to where the application files will be located
|
| 42 |
+
WORKDIR $HOME/app
|
| 43 |
|
| 44 |
# Copy the rest of the application files into the container
|
| 45 |
+
COPY --chown=user:user ./app .
|
| 46 |
|
| 47 |
# Make port 7860 available to the world outside this container
|
| 48 |
EXPOSE 7860
|
| 49 |
|
| 50 |
# Indicate that a volume is expected at /home/user/data
|
|
|
|
| 51 |
VOLUME /home/user/data
|
| 52 |
|
| 53 |
+
# Adjust the COPY command for the entrypoint script to ensure correct placement
|
| 54 |
+
COPY --chown=user:user entrypoint.sh /home/user/entrypoint.sh
|
| 55 |
|
| 56 |
# Change permission of entrypoint.sh and make sure it is executable
|
| 57 |
+
RUN chmod +x /home/user/entrypoint.sh
|
| 58 |
+
|
| 59 |
+
# Update the ENTRYPOINT command to use the full path to entrypoint.sh
|
| 60 |
+
ENTRYPOINT ["/home/user/entrypoint.sh"]
|
| 61 |
|
| 62 |
+
# Set Python path just to make sure
|
| 63 |
+
ENV PYTHONPATH="/home/user/app:${PYTHONPATH}"
|
| 64 |
|
| 65 |
+
# Adjust the CMD to ensure it correctly references the FastAPI app
|
| 66 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -11,7 +11,7 @@ license: apache-2.0
|
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 12 |
|
| 13 |
```
|
| 14 |
-
EduConnect/
|
| 15 |
├── app/
|
| 16 |
│ ├── __init__.py # Initializes the FastAPI app and global configurations
|
| 17 |
│ ├── main.py # Entry point for the FastAPI application, defining routes
|
|
@@ -43,5 +43,5 @@ EduConnect/
|
|
| 43 |
├── Dockerfile # Docker configuration for setting up the environment
|
| 44 |
├── requirements.txt # Lists all Python library dependencies
|
| 45 |
├── entrypoint.sh # To download model from hugging face
|
| 46 |
-
└── .env # Environment variables for configuration settings
|
| 47 |
```
|
|
|
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 12 |
|
| 13 |
```
|
| 14 |
+
**EduConnect/
|
| 15 |
├── app/
|
| 16 |
│ ├── __init__.py # Initializes the FastAPI app and global configurations
|
| 17 |
│ ├── main.py # Entry point for the FastAPI application, defining routes
|
|
|
|
| 43 |
├── Dockerfile # Docker configuration for setting up the environment
|
| 44 |
├── requirements.txt # Lists all Python library dependencies
|
| 45 |
├── entrypoint.sh # To download model from hugging face
|
| 46 |
+
└── .env # Environment variables for configuration settings**
|
| 47 |
```
|
app/admin/admin_functions.py
CHANGED
|
@@ -1,19 +1,18 @@
|
|
|
|
|
|
|
|
| 1 |
from fastapi import HTTPException, UploadFile, File, Form
|
| 2 |
from typing import Optional
|
| 3 |
import bcrypt
|
| 4 |
import os
|
| 5 |
import shutil
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
# Import vector store for database operations
|
| 8 |
-
from
|
| 9 |
# Import embeddings module from langchain for vector representations of text
|
| 10 |
-
from
|
| 11 |
-
from app.main import CHROMADB_LOC
|
| 12 |
-
|
| 13 |
-
from app.utils.chat_rag import sanitize_collection_name
|
| 14 |
-
from ..utils import get_user_cropped_image_from_photo
|
| 15 |
-
|
| 16 |
-
|
| 17 |
|
| 18 |
# Registrering a face
|
| 19 |
async def register_user(db, email: str, name: str, role: str, file: UploadFile = File(...)):
|
|
@@ -104,7 +103,7 @@ def faces_count(client, db):
|
|
| 104 |
def remove_all_faces(client, user_faces_collection="user_faces_db"):
|
| 105 |
# Fetch all user IDs from the user_faces_db collection
|
| 106 |
all_user_ids = client.get_all_ids(collection_name=user_faces_collection)
|
| 107 |
-
|
| 108 |
# Loop through all user IDs and delete associated collections
|
| 109 |
for user_id in all_user_ids:
|
| 110 |
sanitized_collection_name = sanitize_collection_name(user_id)
|
|
|
|
| 1 |
+
import hashlib
|
| 2 |
+
import re
|
| 3 |
from fastapi import HTTPException, UploadFile, File, Form
|
| 4 |
from typing import Optional
|
| 5 |
import bcrypt
|
| 6 |
import os
|
| 7 |
import shutil
|
| 8 |
|
| 9 |
+
from utils.chat_rag import sanitize_collection_name
|
| 10 |
+
from utils.ec_image_utils import get_user_cropped_image_from_photo
|
| 11 |
+
|
| 12 |
# Import vector store for database operations
|
| 13 |
+
from langchain_community.vectorstores import Chroma
|
| 14 |
# Import embeddings module from langchain for vector representations of text
|
| 15 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Registrering a face
|
| 18 |
async def register_user(db, email: str, name: str, role: str, file: UploadFile = File(...)):
|
|
|
|
| 103 |
def remove_all_faces(client, user_faces_collection="user_faces_db"):
|
| 104 |
# Fetch all user IDs from the user_faces_db collection
|
| 105 |
all_user_ids = client.get_all_ids(collection_name=user_faces_collection)
|
| 106 |
+
CHROMADB_LOC = os.getenv('CHROMADB_LOC')
|
| 107 |
# Loop through all user IDs and delete associated collections
|
| 108 |
for user_id in all_user_ids:
|
| 109 |
sanitized_collection_name = sanitize_collection_name(user_id)
|
app/api/userchat.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from fastapi import APIRouter, Depends, HTTPException, Body
|
| 2 |
-
from
|
| 3 |
-
from
|
| 4 |
-
from
|
| 5 |
from typing import Any
|
| 6 |
|
| 7 |
router = APIRouter()
|
|
|
|
| 1 |
from fastapi import APIRouter, Depends, HTTPException, Body
|
| 2 |
+
from dependencies import get_current_user
|
| 3 |
+
from utils.chat_rag import llm_infer
|
| 4 |
+
from utils.chat_rag import sanitize_collection_name
|
| 5 |
from typing import Any
|
| 6 |
|
| 7 |
router = APIRouter()
|
app/api/userlogin.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
from datetime import datetime, timedelta
|
| 2 |
from typing import Optional
|
| 3 |
from fastapi import APIRouter, File, UploadFile, HTTPException
|
| 4 |
-
from
|
| 5 |
-
from
|
| 6 |
-
from
|
| 7 |
import os
|
| 8 |
import uuid
|
| 9 |
|
|
|
|
| 1 |
from datetime import datetime, timedelta
|
| 2 |
from typing import Optional
|
| 3 |
from fastapi import APIRouter, File, UploadFile, HTTPException
|
| 4 |
+
from utils.db import tinydb_helper, chromadb_face_helper
|
| 5 |
+
from utils.jwt_utils import create_access_token
|
| 6 |
+
from utils.ec_image_utils import get_user_cropped_image_from_photo
|
| 7 |
import os
|
| 8 |
import uuid
|
| 9 |
|
app/api/userlogout.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from typing import Any
|
| 2 |
from fastapi import APIRouter, Depends, HTTPException
|
| 3 |
-
from
|
| 4 |
-
from
|
| 5 |
|
| 6 |
router = APIRouter()
|
| 7 |
|
|
|
|
| 1 |
from typing import Any
|
| 2 |
from fastapi import APIRouter, Depends, HTTPException
|
| 3 |
+
from utils.db import tinydb_helper # Ensure this import is correct based on our project structure
|
| 4 |
+
from dependencies import get_current_user, oauth2_scheme
|
| 5 |
|
| 6 |
router = APIRouter()
|
| 7 |
|
app/api/userupload.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
from typing import Any
|
| 2 |
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
|
| 3 |
import os
|
| 4 |
-
from
|
| 5 |
# Assuming a utility for processing PDFs and generating embeddings
|
| 6 |
-
from
|
| 7 |
-
from
|
| 8 |
|
| 9 |
router = APIRouter()
|
| 10 |
|
|
|
|
| 1 |
from typing import Any
|
| 2 |
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
|
| 3 |
import os
|
| 4 |
+
from dependencies import get_current_user
|
| 5 |
# Assuming a utility for processing PDFs and generating embeddings
|
| 6 |
+
from utils.doc_ingest import ingest_document
|
| 7 |
+
from utils.chat_rag import sanitize_collection_name
|
| 8 |
|
| 9 |
router = APIRouter()
|
| 10 |
|
app/dependencies.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
from fastapi import Depends, HTTPException, status
|
| 2 |
from fastapi.security import OAuth2PasswordBearer
|
| 3 |
from jose import jwt, JWTError # Ensure this is correctly imported
|
| 4 |
-
from
|
| 5 |
-
from
|
| 6 |
|
| 7 |
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
| 8 |
|
|
|
|
| 1 |
from fastapi import Depends, HTTPException, status
|
| 2 |
from fastapi.security import OAuth2PasswordBearer
|
| 3 |
from jose import jwt, JWTError # Ensure this is correctly imported
|
| 4 |
+
from utils.db import tinydb_helper # Ensure this instance is correctly initialized elsewhere
|
| 5 |
+
from utils.jwt_utils import SECRET_KEY, ALGORITHM, decode_jwt
|
| 6 |
|
| 7 |
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
| 8 |
|
app/main.py
CHANGED
|
@@ -7,15 +7,15 @@ from fastapi.staticfiles import StaticFiles
|
|
| 7 |
from fastapi.responses import HTMLResponse, RedirectResponse
|
| 8 |
from fastapi.templating import Jinja2Templates
|
| 9 |
|
| 10 |
-
from
|
| 11 |
-
from
|
| 12 |
-
from
|
| 13 |
-
from
|
| 14 |
-
|
| 15 |
-
CHROMADB_LOC = "/home/user/data/chromadb"
|
| 16 |
|
| 17 |
app = FastAPI()
|
| 18 |
|
|
|
|
|
|
|
| 19 |
# Add middleware
|
| 20 |
# Set all origins to wildcard for simplicity, but we should limit this in production
|
| 21 |
app.add_middleware(
|
|
@@ -43,10 +43,10 @@ async def startup_event():
|
|
| 43 |
print(f"MODEL_PATH in main.py = {os.getenv('MODEL_PATH')} ")
|
| 44 |
|
| 45 |
# Mount static files
|
| 46 |
-
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 47 |
|
| 48 |
# Setup Jinja2Templates to point to the templates directory
|
| 49 |
-
templates = Jinja2Templates(directory="
|
| 50 |
|
| 51 |
@app.get("/")
|
| 52 |
async def get_admin_login(request: Request):
|
|
@@ -90,7 +90,7 @@ async def handle_user_registration(request: Request, email: str = Form(...), nam
|
|
| 90 |
@app.get("/admin/data_management", response_class=HTMLResponse)
|
| 91 |
async def get_db_details(request: Request):
|
| 92 |
# Render the Chroma DB details
|
| 93 |
-
faces = admin.faces_count(user_faces_db)
|
| 94 |
return templates.TemplateResponse("data_management.html", {
|
| 95 |
"request": request,
|
| 96 |
"faces" : faces
|
|
|
|
| 7 |
from fastapi.responses import HTMLResponse, RedirectResponse
|
| 8 |
from fastapi.templating import Jinja2Templates
|
| 9 |
|
| 10 |
+
from admin import admin_functions as admin
|
| 11 |
+
from utils.db import UserFaceEmbeddingFunction,ChromaDBFaceHelper
|
| 12 |
+
from api import userlogin, userlogout, userchat, userupload
|
| 13 |
+
from utils.db import ChromaDBFaceHelper
|
|
|
|
|
|
|
| 14 |
|
| 15 |
app = FastAPI()
|
| 16 |
|
| 17 |
+
CHROMADB_LOC = os.getenv('CHROMADB_LOC')
|
| 18 |
+
|
| 19 |
# Add middleware
|
| 20 |
# Set all origins to wildcard for simplicity, but we should limit this in production
|
| 21 |
app.add_middleware(
|
|
|
|
| 43 |
print(f"MODEL_PATH in main.py = {os.getenv('MODEL_PATH')} ")
|
| 44 |
|
| 45 |
# Mount static files
|
| 46 |
+
app.mount("/home/user/app/static", StaticFiles(directory="static"), name="static")
|
| 47 |
|
| 48 |
# Setup Jinja2Templates to point to the templates directory
|
| 49 |
+
templates = Jinja2Templates(directory="admin/templates")
|
| 50 |
|
| 51 |
@app.get("/")
|
| 52 |
async def get_admin_login(request: Request):
|
|
|
|
| 90 |
@app.get("/admin/data_management", response_class=HTMLResponse)
|
| 91 |
async def get_db_details(request: Request):
|
| 92 |
# Render the Chroma DB details
|
| 93 |
+
faces = admin.faces_count(ec_client, user_faces_db)
|
| 94 |
return templates.TemplateResponse("data_management.html", {
|
| 95 |
"request": request,
|
| 96 |
"faces" : faces
|
app/utils/__init__.py
CHANGED
|
@@ -1,2 +1 @@
|
|
| 1 |
-
# In utils/__init__.py
|
| 2 |
-
from .ec_image_utils import get_user_cropped_image_from_photo
|
|
|
|
| 1 |
+
# In utils/__init__.py
|
|
|
app/utils/chat_rag.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
import hashlib
|
|
|
|
| 5 |
from langchain.document_loaders import PyPDFLoader
|
| 6 |
|
| 7 |
# Import embeddings module from langchain for vector representations of text
|
|
@@ -23,13 +24,13 @@ from langchain.chains.router import MultiPromptChain
|
|
| 23 |
from langchain.chains import ConversationChain
|
| 24 |
from langchain.chains.llm import LLMChain
|
| 25 |
from langchain.prompts import PromptTemplate
|
| 26 |
-
from langchain.memory import ConversationBufferMemory
|
| 27 |
from langchain.chains import ConversationalRetrievalChain
|
| 28 |
|
| 29 |
from langchain.callbacks.manager import CallbackManager
|
| 30 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 31 |
|
| 32 |
-
|
| 33 |
|
| 34 |
def sanitize_collection_name(email):
|
| 35 |
# Replace invalid characters with an underscore
|
|
@@ -47,7 +48,9 @@ def sanitize_collection_name(email):
|
|
| 47 |
|
| 48 |
# Modify vectordb initialization to be dynamic based on user_id
|
| 49 |
def get_vectordb_for_user(user_collection_name):
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
vectordb = Chroma(
|
| 52 |
collection_name=user_collection_name,
|
| 53 |
embedding_function=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'),
|
|
@@ -57,6 +60,10 @@ def get_vectordb_for_user(user_collection_name):
|
|
| 57 |
|
| 58 |
|
| 59 |
def pdf_to_vec(filename, user_collection_name):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
document = []
|
| 61 |
loader = PyPDFLoader(filename)
|
| 62 |
document.extend(loader.load()) #which library is this from?
|
|
@@ -111,6 +118,9 @@ def load_llm():
|
|
| 111 |
|
| 112 |
#step 5, to instantiate once to create default_chain,router_chain,destination_chains into chain and set vectordb. so will not re-create per prompt
|
| 113 |
def default_chain(llm, user_collection_name):
|
|
|
|
|
|
|
|
|
|
| 114 |
vectordb = get_vectordb_for_user(user_collection_name) # Use the dynamic vectordb based on user_id
|
| 115 |
sum_template = """
|
| 116 |
As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
|
|
@@ -162,9 +172,16 @@ def default_chain(llm, user_collection_name):
|
|
| 162 |
for p_info in prompt_infos:
|
| 163 |
name = p_info["name"]
|
| 164 |
prompt_template = p_info["prompt_template"]
|
|
|
|
| 165 |
prompt = PromptTemplate(template=prompt_template, input_variables=["input"])
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
destination_chains[name] = chain
|
|
|
|
| 168 |
#default_chain = ConversationChain(llm=llm, output_key="text")
|
| 169 |
#memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
|
| 170 |
|
|
|
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
import hashlib
|
| 5 |
+
|
| 6 |
from langchain.document_loaders import PyPDFLoader
|
| 7 |
|
| 8 |
# Import embeddings module from langchain for vector representations of text
|
|
|
|
| 24 |
from langchain.chains import ConversationChain
|
| 25 |
from langchain.chains.llm import LLMChain
|
| 26 |
from langchain.prompts import PromptTemplate
|
| 27 |
+
from langchain.memory import ConversationBufferMemory, VectorStoreRetrieverMemory
|
| 28 |
from langchain.chains import ConversationalRetrievalChain
|
| 29 |
|
| 30 |
from langchain.callbacks.manager import CallbackManager
|
| 31 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 32 |
|
| 33 |
+
|
| 34 |
|
| 35 |
def sanitize_collection_name(email):
|
| 36 |
# Replace invalid characters with an underscore
|
|
|
|
| 48 |
|
| 49 |
# Modify vectordb initialization to be dynamic based on user_id
|
| 50 |
def get_vectordb_for_user(user_collection_name):
|
| 51 |
+
# Get Chromadb location
|
| 52 |
+
CHROMADB_LOC = os.getenv('CHROMADB_LOC')
|
| 53 |
+
|
| 54 |
vectordb = Chroma(
|
| 55 |
collection_name=user_collection_name,
|
| 56 |
embedding_function=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'),
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
def pdf_to_vec(filename, user_collection_name):
|
| 63 |
+
|
| 64 |
+
# Get Chromadb location
|
| 65 |
+
CHROMADB_LOC = os.getenv('CHROMADB_LOC')
|
| 66 |
+
|
| 67 |
document = []
|
| 68 |
loader = PyPDFLoader(filename)
|
| 69 |
document.extend(loader.load()) #which library is this from?
|
|
|
|
| 118 |
|
| 119 |
#step 5, to instantiate once to create default_chain,router_chain,destination_chains into chain and set vectordb. so will not re-create per prompt
|
| 120 |
def default_chain(llm, user_collection_name):
|
| 121 |
+
# Get Chromadb location
|
| 122 |
+
CHROMADB_LOC = os.getenv('CHROMADB_LOC')
|
| 123 |
+
|
| 124 |
vectordb = get_vectordb_for_user(user_collection_name) # Use the dynamic vectordb based on user_id
|
| 125 |
sum_template = """
|
| 126 |
As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
|
|
|
|
| 172 |
for p_info in prompt_infos:
|
| 173 |
name = p_info["name"]
|
| 174 |
prompt_template = p_info["prompt_template"]
|
| 175 |
+
#vectordb=p_info["vector"]
|
| 176 |
prompt = PromptTemplate(template=prompt_template, input_variables=["input"])
|
| 177 |
+
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') #new
|
| 178 |
+
vectordb= Chroma(persist_directory = CHROMADB_LOC, embedding_function = embeddings) #new
|
| 179 |
+
retriever = vectordb.as_retriever()#new
|
| 180 |
+
memory = VectorStoreRetrieverMemory(retriever=retriever) #new
|
| 181 |
+
|
| 182 |
+
chain = LLMChain(llm=llm, prompt=prompt, verbose=True, memory=memory) #new memory=memory
|
| 183 |
destination_chains[name] = chain
|
| 184 |
+
|
| 185 |
#default_chain = ConversationChain(llm=llm, output_key="text")
|
| 186 |
#memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
|
| 187 |
|
app/utils/db.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
|
|
|
|
| 2 |
from tinydb import TinyDB, Query, where
|
| 3 |
from tinydb.storages import MemoryStorage
|
| 4 |
import chromadb
|
|
@@ -7,7 +8,6 @@ from keras_facenet import FaceNet
|
|
| 7 |
from typing import Any
|
| 8 |
from datetime import datetime, timedelta
|
| 9 |
|
| 10 |
-
CHROMADB_LOC = "/home/user/data/chromadb"
|
| 11 |
|
| 12 |
class TinyDBHelper:
|
| 13 |
def __init__(self):
|
|
@@ -77,4 +77,4 @@ class ChromaDBFaceHelper:
|
|
| 77 |
|
| 78 |
# Initialize these helpers globally if they are to be used across multiple modules
|
| 79 |
tinydb_helper = TinyDBHelper()
|
| 80 |
-
chromadb_face_helper = ChromaDBFaceHelper(CHROMADB_LOC) # Initialization requires db_path
|
|
|
|
| 1 |
|
| 2 |
+
import os
|
| 3 |
from tinydb import TinyDB, Query, where
|
| 4 |
from tinydb.storages import MemoryStorage
|
| 5 |
import chromadb
|
|
|
|
| 8 |
from typing import Any
|
| 9 |
from datetime import datetime, timedelta
|
| 10 |
|
|
|
|
| 11 |
|
| 12 |
class TinyDBHelper:
|
| 13 |
def __init__(self):
|
|
|
|
| 77 |
|
| 78 |
# Initialize these helpers globally if they are to be used across multiple modules
|
| 79 |
tinydb_helper = TinyDBHelper()
|
| 80 |
+
chromadb_face_helper = ChromaDBFaceHelper(os.getenv('CHROMADB_LOC')) # Initialization requires db_path
|
docker-compose.yml
DELETED
|
@@ -1,17 +0,0 @@
|
|
| 1 |
-
version: '3.8'
|
| 2 |
-
services:
|
| 3 |
-
educonnect:
|
| 4 |
-
image: python:3.9
|
| 5 |
-
command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--reload"]
|
| 6 |
-
volumes:
|
| 7 |
-
- /Users/tyago/Workspace/EduConnect:/home/user/app
|
| 8 |
-
ports:
|
| 9 |
-
- "7860:7860"
|
| 10 |
-
environment:
|
| 11 |
-
- HOME=/home/user
|
| 12 |
-
- PATH=/home/user/.local/bin:$PATH
|
| 13 |
-
- NAME=EduConnect
|
| 14 |
-
user: "1000:1000"
|
| 15 |
-
working_dir: /home/user/app
|
| 16 |
-
env_file:
|
| 17 |
-
- .env
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
entrypoint.sh
CHANGED
|
@@ -1,18 +1,19 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
|
|
|
|
|
|
| 3 |
# Authenticate with Hugging Face
|
| 4 |
export HF_HOME=/home/user/data/hf_cache
|
| 5 |
-
|
| 6 |
-
# Assuming HF_TOKEN is already exported to the environment
|
| 7 |
echo "Using Hugging Face API token for authentication"
|
| 8 |
|
| 9 |
# Navigate to the directory where download_model.py is located
|
| 10 |
echo "Determining model path..."
|
| 11 |
-
cd /home/user/app/
|
|
|
|
| 12 |
|
| 13 |
# Execute the download_model script
|
| 14 |
python download_model.py || { echo "Model download failed"; exit 1; }
|
| 15 |
-
|
| 16 |
# Read the model path from model_path.txt and export it
|
| 17 |
MODEL_PATH=$(cat /home/user/data/models/model_path.txt)
|
| 18 |
export MODEL_PATH
|
|
@@ -20,6 +21,8 @@ echo "@ Entrypoint - MODEL_PATH exported=${MODEL_PATH}"
|
|
| 20 |
|
| 21 |
# Navigate back to the app directory
|
| 22 |
cd /home/user/app
|
|
|
|
| 23 |
|
| 24 |
# Execute the main command of the container
|
|
|
|
| 25 |
exec "$@"
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
+
echo "Starting entrypoint script..."
|
| 4 |
+
|
| 5 |
# Authenticate with Hugging Face
|
| 6 |
export HF_HOME=/home/user/data/hf_cache
|
|
|
|
|
|
|
| 7 |
echo "Using Hugging Face API token for authentication"
|
| 8 |
|
| 9 |
# Navigate to the directory where download_model.py is located
|
| 10 |
echo "Determining model path..."
|
| 11 |
+
cd /home/user/app/utils
|
| 12 |
+
echo "Current directory for model download: $(pwd)"
|
| 13 |
|
| 14 |
# Execute the download_model script
|
| 15 |
python download_model.py || { echo "Model download failed"; exit 1; }
|
| 16 |
+
|
| 17 |
# Read the model path from model_path.txt and export it
|
| 18 |
MODEL_PATH=$(cat /home/user/data/models/model_path.txt)
|
| 19 |
export MODEL_PATH
|
|
|
|
| 21 |
|
| 22 |
# Navigate back to the app directory
|
| 23 |
cd /home/user/app
|
| 24 |
+
echo "Returning to app directory: $(pwd)"
|
| 25 |
|
| 26 |
# Execute the main command of the container
|
| 27 |
+
echo "Executing main command..."
|
| 28 |
exec "$@"
|
requirements.txt
CHANGED
|
@@ -18,4 +18,5 @@ tinydb # The in memory database for storing JWT tokens
|
|
| 18 |
langchain # Langgchain for RAG
|
| 19 |
llama-cpp-python # To load the model
|
| 20 |
sentence-transformers # For text embeddings
|
| 21 |
-
pypdf # Handling PDF files
|
|
|
|
|
|
| 18 |
langchain # Langgchain for RAG
|
| 19 |
llama-cpp-python # To load the model
|
| 20 |
sentence-transformers # For text embeddings
|
| 21 |
+
pypdf # Handling PDF files
|
| 22 |
+
langchain-community # Latest library to prevent dprecated warnings
|