Spaces:
Running
Running
Farid Karimli
commited on
Commit
·
527151b
1
Parent(s):
c028257
Retarget to Spring 25
Browse files- .github/workflows/code_quality_check.yml +23 -23
- .github/workflows/deploy_to_hf.yml +21 -0
- .gitignore +2 -0
- apps/ai_tutor/chainlit_app.py +8 -8
- apps/ai_tutor/config/config.yml +30 -30
- apps/ai_tutor/config/project_config.yml +43 -3
- apps/ai_tutor/public/files/students_encrypted.json +21 -1
- apps/ai_tutor/storage/data/urls.txt +1 -1
.github/workflows/code_quality_check.yml
CHANGED
|
@@ -2,32 +2,32 @@ name: Code Quality and Security Checks
|
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
-
branches: [
|
| 6 |
pull_request:
|
| 7 |
-
branches: [
|
| 8 |
|
| 9 |
jobs:
|
| 10 |
code-quality:
|
| 11 |
runs-on: ubuntu-latest
|
| 12 |
steps:
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
+
branches: [main]
|
| 6 |
pull_request:
|
| 7 |
+
branches: [main]
|
| 8 |
|
| 9 |
jobs:
|
| 10 |
code-quality:
|
| 11 |
runs-on: ubuntu-latest
|
| 12 |
steps:
|
| 13 |
+
- uses: actions/checkout@v3
|
| 14 |
+
|
| 15 |
+
- name: Set up Python
|
| 16 |
+
uses: actions/setup-python@v4
|
| 17 |
+
with:
|
| 18 |
+
python-version: "3.11"
|
| 19 |
+
|
| 20 |
+
- name: Install dependencies
|
| 21 |
+
run: |
|
| 22 |
+
python -m pip install --upgrade pip
|
| 23 |
+
pip install flake8 black bandit
|
| 24 |
+
|
| 25 |
+
- name: Run Black
|
| 26 |
+
run: black --check .
|
| 27 |
+
|
| 28 |
+
- name: Run Flake8
|
| 29 |
+
run: flake8 .
|
| 30 |
+
|
| 31 |
+
- name: Run Bandit
|
| 32 |
+
run: |
|
| 33 |
+
bandit -r .
|
.github/workflows/deploy_to_hf.yml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Push Production to HuggingFace
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main]
|
| 6 |
+
|
| 7 |
+
# run this workflow manualy from the Actions tab
|
| 8 |
+
workflow_dispatch:
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
sync-to-hub:
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
steps:
|
| 14 |
+
- uses: actions/checkout@v4
|
| 15 |
+
with:
|
| 16 |
+
fetch-depth: 0
|
| 17 |
+
lfs: true
|
| 18 |
+
- name: Deploy Production (main) to HuggingFace
|
| 19 |
+
env:
|
| 20 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 21 |
+
run: git push --force https://faridkarimli:$HF_TOKEN@huggingface.co/spaces/dl4ds/sp25_tutor
|
.gitignore
CHANGED
|
@@ -10,3 +10,5 @@ vectorstores/*
|
|
| 10 |
*.log
|
| 11 |
**/.files/*
|
| 12 |
.env
|
|
|
|
|
|
|
|
|
| 10 |
*.log
|
| 11 |
**/.files/*
|
| 12 |
.env
|
| 13 |
+
.venv/*
|
| 14 |
+
.venv
|
apps/ai_tutor/chainlit_app.py
CHANGED
|
@@ -239,23 +239,23 @@ class Chatbot:
|
|
| 239 |
print(e)
|
| 240 |
return [
|
| 241 |
cl.Starter(
|
| 242 |
-
label="
|
| 243 |
-
message="
|
| 244 |
icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg",
|
| 245 |
),
|
| 246 |
cl.Starter(
|
| 247 |
-
label="
|
| 248 |
-
message="
|
| 249 |
icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg",
|
| 250 |
),
|
| 251 |
cl.Starter(
|
| 252 |
-
label="
|
| 253 |
-
message="
|
| 254 |
icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg",
|
| 255 |
),
|
| 256 |
cl.Starter(
|
| 257 |
-
label="
|
| 258 |
-
message="
|
| 259 |
icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg",
|
| 260 |
),
|
| 261 |
]
|
|
|
|
| 239 |
print(e)
|
| 240 |
return [
|
| 241 |
cl.Starter(
|
| 242 |
+
label="What is this class about?",
|
| 243 |
+
message="What is this class about?",
|
| 244 |
icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg",
|
| 245 |
),
|
| 246 |
cl.Starter(
|
| 247 |
+
label="What is the schedule?",
|
| 248 |
+
message="What is the schedule?",
|
| 249 |
icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg",
|
| 250 |
),
|
| 251 |
cl.Starter(
|
| 252 |
+
label="Who are the instructors?",
|
| 253 |
+
message="Who are the instructors?",
|
| 254 |
icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg",
|
| 255 |
),
|
| 256 |
cl.Starter(
|
| 257 |
+
label="Will we learn about Transformers?",
|
| 258 |
+
message="Will we learn about Transformers?",
|
| 259 |
icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg",
|
| 260 |
),
|
| 261 |
]
|
apps/ai_tutor/config/config.yml
CHANGED
|
@@ -1,22 +1,22 @@
|
|
| 1 |
-
log_dir:
|
| 2 |
-
log_chunk_dir:
|
| 3 |
-
device:
|
| 4 |
|
| 5 |
vectorstore:
|
| 6 |
-
load_from_HF:
|
| 7 |
reparse_files: True # bool
|
| 8 |
-
data_path:
|
| 9 |
-
url_file_path:
|
| 10 |
expand_urls: True # bool
|
| 11 |
-
db_option
|
| 12 |
-
db_path
|
| 13 |
-
model
|
| 14 |
-
search_top_k
|
| 15 |
-
score_threshold
|
| 16 |
|
| 17 |
faiss_params: # Not used as of now
|
| 18 |
-
index_path:
|
| 19 |
-
index_type:
|
| 20 |
index_dimension: 384 # int
|
| 21 |
index_nlist: 100 # int
|
| 22 |
index_nprobe: 10 # int
|
|
@@ -24,37 +24,37 @@ vectorstore:
|
|
| 24 |
colbert_params:
|
| 25 |
index_name: "new_idx" # str
|
| 26 |
|
| 27 |
-
llm_params:
|
| 28 |
-
llm_arch:
|
| 29 |
use_history: True # bool
|
| 30 |
generate_follow_up: False # bool
|
| 31 |
memory_window: 3 # int
|
| 32 |
-
llm_style:
|
| 33 |
-
llm_loader:
|
| 34 |
openai_params:
|
| 35 |
temperature: 0.7 # float
|
| 36 |
local_llm_params:
|
| 37 |
temperature: 0.7 # float
|
| 38 |
-
repo_id:
|
| 39 |
-
filename:
|
| 40 |
-
model_path:
|
| 41 |
stream: False # bool
|
| 42 |
-
pdf_reader:
|
| 43 |
|
| 44 |
chat_logging:
|
| 45 |
log_chat: True # bool
|
| 46 |
-
platform:
|
| 47 |
callbacks: True # bool
|
| 48 |
|
| 49 |
splitter_options:
|
| 50 |
use_splitter: True # bool
|
| 51 |
-
split_by_token
|
| 52 |
remove_leftover_delimiters: True # bool
|
| 53 |
remove_chunks: False # bool
|
| 54 |
-
chunking_mode:
|
| 55 |
-
chunk_size
|
| 56 |
-
chunk_overlap
|
| 57 |
-
chunk_separators
|
| 58 |
-
front_chunks_to_remove
|
| 59 |
-
last_chunks_to_remove
|
| 60 |
-
delimiters_to_remove
|
|
|
|
| 1 |
+
log_dir: "storage/logs" # str
|
| 2 |
+
log_chunk_dir: "storage/logs/chunks" # str
|
| 3 |
+
device: "cpu" # str [cuda, cpu]
|
| 4 |
|
| 5 |
vectorstore:
|
| 6 |
+
load_from_HF: False # bool
|
| 7 |
reparse_files: True # bool
|
| 8 |
+
data_path: "storage/data" # str
|
| 9 |
+
url_file_path: "storage/data/urls.txt" # str
|
| 10 |
expand_urls: True # bool
|
| 11 |
+
db_option: "FAISS" # str [FAISS, Chroma, RAGatouille, RAPTOR]
|
| 12 |
+
db_path: "vectorstores" # str
|
| 13 |
+
model: "sentence-transformers/all-MiniLM-L6-v2" # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
|
| 14 |
+
search_top_k: 5 # int
|
| 15 |
+
score_threshold: 0.2 # float
|
| 16 |
|
| 17 |
faiss_params: # Not used as of now
|
| 18 |
+
index_path: "vectorstores/faiss.index" # str
|
| 19 |
+
index_type: "Flat" # str [Flat, HNSW, IVF]
|
| 20 |
index_dimension: 384 # int
|
| 21 |
index_nlist: 100 # int
|
| 22 |
index_nprobe: 10 # int
|
|
|
|
| 24 |
colbert_params:
|
| 25 |
index_name: "new_idx" # str
|
| 26 |
|
| 27 |
+
llm_params:
|
| 28 |
+
llm_arch: "langchain" # [langchain]
|
| 29 |
use_history: True # bool
|
| 30 |
generate_follow_up: False # bool
|
| 31 |
memory_window: 3 # int
|
| 32 |
+
llm_style: "Normal" # str [Normal, ELI5]
|
| 33 |
+
llm_loader: "gpt-4o-mini" # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
|
| 34 |
openai_params:
|
| 35 |
temperature: 0.7 # float
|
| 36 |
local_llm_params:
|
| 37 |
temperature: 0.7 # float
|
| 38 |
+
repo_id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # HuggingFace repo id
|
| 39 |
+
filename: "tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Specific name of gguf file in the repo
|
| 40 |
+
model_path: "storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Path to the model file
|
| 41 |
stream: False # bool
|
| 42 |
+
pdf_reader: "pymupdf" # str [llama, pymupdf, gpt]
|
| 43 |
|
| 44 |
chat_logging:
|
| 45 |
log_chat: True # bool
|
| 46 |
+
platform: "literalai"
|
| 47 |
callbacks: True # bool
|
| 48 |
|
| 49 |
splitter_options:
|
| 50 |
use_splitter: True # bool
|
| 51 |
+
split_by_token: True # bool
|
| 52 |
remove_leftover_delimiters: True # bool
|
| 53 |
remove_chunks: False # bool
|
| 54 |
+
chunking_mode: "semantic" # str [fixed, semantic]
|
| 55 |
+
chunk_size: 1000 # int
|
| 56 |
+
chunk_overlap: 100 # int
|
| 57 |
+
chunk_separators: ["\n\n", "\n", " ", ""] # list of strings
|
| 58 |
+
front_chunks_to_remove: null # int or None
|
| 59 |
+
last_chunks_to_remove: null # int or None
|
| 60 |
+
delimiters_to_remove: ['\t', '\n', " ", " "] # list of strings
|
apps/ai_tutor/config/project_config.yml
CHANGED
|
@@ -3,15 +3,55 @@ retriever:
|
|
| 3 |
RAGatouille: "XThomasBU/Colbert_Index"
|
| 4 |
|
| 5 |
metadata:
|
| 6 |
-
metadata_links:
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
token_config:
|
| 10 |
cooldown_time: 60
|
| 11 |
regen_time: 180
|
| 12 |
-
tokens_left:
|
| 13 |
all_time_tokens_allocated: 1000000
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
misc:
|
| 16 |
github_repo: "https://github.com/edubotics-ai/edubot-core"
|
| 17 |
docs_website: "https://dl4ds.github.io/dl4ds_tutor/"
|
|
|
|
| 3 |
RAGatouille: "XThomasBU/Colbert_Index"
|
| 4 |
|
| 5 |
metadata:
|
| 6 |
+
metadata_links:
|
| 7 |
+
[
|
| 8 |
+
"https://dl4ds.github.io/sp2025/",
|
| 9 |
+
"https://dl4ds.github.io/sp2025/schedule/",
|
| 10 |
+
]
|
| 11 |
+
slide_base_link:
|
| 12 |
+
"https://dl4ds.github.io"
|
| 13 |
+
|
| 14 |
+
# Assignment base link is used to find the webpage where the assignment is described/posted
|
| 15 |
+
assignment_base_link: "https://tools4ds.github.io/fa2024/assignments/"
|
| 16 |
+
|
| 17 |
+
# Define content types - assignments, lectures, etc.
|
| 18 |
+
content_types:
|
| 19 |
+
- "lectures"
|
| 20 |
+
- "assignments"
|
| 21 |
+
- "discussion"
|
| 22 |
+
- "other"
|
| 23 |
+
|
| 24 |
+
# These need to be patterns from URLs that can be used to identify the type of content uniquely
|
| 25 |
+
lectures_pattern: "/lectures/"
|
| 26 |
+
assignments_pattern: "/assignments/"
|
| 27 |
+
discussion_pattern: "/discussion/"
|
| 28 |
+
project_pattern: "/project/"
|
| 29 |
+
|
| 30 |
+
# These are fields that can be extracted from the webpages of the course content
|
| 31 |
+
lecture_metadata_fields:
|
| 32 |
+
- "title"
|
| 33 |
+
- "tldr"
|
| 34 |
+
- "date"
|
| 35 |
+
- "lecture_recording"
|
| 36 |
+
- "suggested_readings"
|
| 37 |
+
|
| 38 |
+
assignment_metadata_fields:
|
| 39 |
+
- "title"
|
| 40 |
+
- "release_date"
|
| 41 |
+
- "due_date"
|
| 42 |
+
- "source_file"
|
| 43 |
|
| 44 |
token_config:
|
| 45 |
cooldown_time: 60
|
| 46 |
regen_time: 180
|
| 47 |
+
tokens_left: 50000
|
| 48 |
all_time_tokens_allocated: 1000000
|
| 49 |
|
| 50 |
+
content:
|
| 51 |
+
notebookheaders_to_split_on:
|
| 52 |
+
- ["##", "Section"]
|
| 53 |
+
- ["#", "Title"]
|
| 54 |
+
|
| 55 |
misc:
|
| 56 |
github_repo: "https://github.com/edubotics-ai/edubot-core"
|
| 57 |
docs_website: "https://dl4ds.github.io/dl4ds_tutor/"
|
apps/ai_tutor/public/files/students_encrypted.json
CHANGED
|
@@ -1 +1,21 @@
|
|
| 1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"7810b25bef84317130e2a59da978ee716bb96f6a8a9296c051b7ad4108aa8e6a": [
|
| 3 |
+
"admin",
|
| 4 |
+
"student",
|
| 5 |
+
"bu"
|
| 6 |
+
],
|
| 7 |
+
"0bf8b6cca820bd8628a31d8d44a7b94fcd6d058c9d5a0c52b7ffdf01ac5ce310": [
|
| 8 |
+
"student",
|
| 9 |
+
"bu"
|
| 10 |
+
],
|
| 11 |
+
"0645db6f7b415e3b04a4fc327151c3c7bbcd25ec546ee0b3604957b571a79bc2": [
|
| 12 |
+
"admin",
|
| 13 |
+
"instructor",
|
| 14 |
+
"bu"
|
| 15 |
+
],
|
| 16 |
+
"a95f36e2700c554639d3522834b47733f5ed1f05c5a43d04ac2575571dd43563": [
|
| 17 |
+
"admin",
|
| 18 |
+
"instructor",
|
| 19 |
+
"bu"
|
| 20 |
+
]
|
| 21 |
+
}
|
apps/ai_tutor/storage/data/urls.txt
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
https://dl4ds.github.io/
|
|
|
|
| 1 |
+
https://dl4ds.github.io/sp2025/
|