Spaces:

MLDevOps
/

CS553_CaseStudy1

Sleeping

App Files Files Community

github-actions commited on Feb 5

Commit

9826f0b

0 Parent(s):

Deploy to Spaces with Xet

Browse files

Files changed (27) hide show

.gitattributes +2 -0
.github/workflows/hf_sync.yml +59 -0
.github/workflows/run_model_tests.yml +44 -0
.github/workflows/run_tests.yml +39 -0
.gitignore +211 -0
.python-version +1 -0
LICENSE +21 -0
README.md +99 -0
anime.db +3 -0
app.py +51 -0
backend.py +100 -0
constants.py +15 -0
create_db.py +94 -0
data/.gitkeep +0 -0
genrelist.txt +16 -0
main.py +6 -0
pyproject.toml +14 -0
requirements.txt +85 -0
retrieval_utils.py +78 -0
static/css/gradiomain.css +240 -0
static/css/theme.css +35 -0
static/images/background.png +3 -0
static/images/sidebar.jpg +3 -0
test_detect_genre.py +11 -0
tests/test_chat_models.py +48 -0
tests/test_retrieval_utils.py +163 -0
uv.lock +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ static/images/* filter=lfs diff=lfs merge=lfs -text
2	+ anime.db filter=lfs diff=lfs merge=lfs -text

.github/workflows/hf_sync.yml ADDED Viewed

	@@ -0,0 +1,59 @@

+name: Sync to HuggingFace Spaces
+on:
+  workflow_run:
+    workflows: ["Run All Unit Tests", "Run Model Tests on Production Branch"]
+    types:
+      - completed
+    branches: [ "production" ]
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    # Only run if the triggering workflows succeeded or when we run it manually from GH Actions UI
+    if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: production
+          fetch-depth: 0
+          lfs: true
+      - name: Install xet
+        run: |
+          curl --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/huggingface/xet-core/refs/heads/main/git_xet/install.sh | sh
+          echo "$HOME/.xet/bin" >> $GITHUB_PATH
+      - name: Set git user for xet security
+        run: |
+          git config user.name "github-actions"
+          git config user.email "actions@github.com"
+      - name: Convert binaries to xet and Push to HuggingFace
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          # 1. Install Xet
+          git xet install
+          # 2. Create a fresh, empty branch for deployment
+          # This disconnects from previous history, removing the "binary in history" error
+          git checkout --orphan deployment-branch
+          # 3. Unstage everything so we can re-add them properly with Xet
+          git reset
+          # 4. Configure Xet tracking
+          # We do this *before* adding files so Xet handles them correctly
+          git xet track "anime.db" "static/images/*"
+          # 5. Add all files
+          # Since we are in a fresh branch, this adds everything currently on disk
+          git add .
+          # 6. Commit and Force Push
+          git config user.name "github-actions"
+          git config user.email "actions@github.com"
+          git commit -m "Deploy to Spaces with Xet"
+          # Force push to 'main' on Hugging Face
+          git push --force https://MLDevOps:$HF_TOKEN@huggingface.co/spaces/MLDevOps/CS553_CaseStudy1 HEAD:main

.github/workflows/run_model_tests.yml ADDED Viewed

	@@ -0,0 +1,44 @@

+name: Run Model Tests on Production Branch
+# Run on every push and pull request
+on:
+  push:
+    branches: [ "production" ]
+  workflow_dispatch:
+# Run the following Jobs (just 1)
+jobs:
+  # Run the "run_test" job
+  run_test:
+    # It should run on latest ubuntu OS
+    runs-on: ubuntu-latest
+    # It should use the following steps
+    steps:
+      # Checkout the current repository
+      - uses: actions/checkout@v3
+        with:
+          ref: production
+          fetch-depth: 0
+          lfs: true
+      # Install Python with 3.12 version
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      # Install all required python libraries
+      - name: Install Required Libraries
+        run: |
+          pip install -r requirements.txt
+      # Run all the unit tests quietly
+      - name: Run Unit Test for Chat Models (Quietly)
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          PYTHONPATH: ${{ github.workspace }}
+        run: |
+          pytest -q tests/test_chat_models.py

.github/workflows/run_tests.yml ADDED Viewed

	@@ -0,0 +1,39 @@

+name: Run All Unit Tests
+# Run on every push and pull request
+on:
+  push:
+  workflow_dispatch:
+# Run the following Jobs (just 1)
+jobs:
+  # Run the "run_test" job
+  run_test:
+    # It should run on latest ubuntu OS
+    runs-on: ubuntu-latest
+    # It should use the following steps
+    steps:
+      # Checkout the current repository
+      - uses: actions/checkout@v3
+      # Install Python with 3.12 version
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      # Install all required python libraries
+      - name: Install Required Libraries
+        run: |
+          pip install -r requirements.txt
+      # Run all the unit tests quietly
+      - name: Run all Unit Tests (Quietly)
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          PYTHONPATH: ${{ github.workspace }}
+        run: |
+          pytest -q --ignore=tests/test_chat_models.py

.gitignore ADDED Viewed

	@@ -0,0 +1,211 @@

+# Ignore CSV files
+*.csv
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+.venv/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2026 ShafathZ
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,99 @@

+---
+title: CS553 CaseStudy1
+emoji: 💬
+colorFrom: yellow
+colorTo: purple
+sdk: gradio
+sdk_version: 6.5.1
+python_version: 3.12.3
+app_file: app.py
+pinned: false
+hf_oauth: true
+hf_oauth_scopes:
+- inference-api
+license: mit
+---
+An Anime Recommendation chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
+## Models Used by our Chatbot
+| Type of Model | Model Name (Hugging Face Path) |
+|---------------|--------------------------------|
+| Local Model   | `Qwen/Qwen3-0.6B`         |
+| Inference Client Model| `openai/gpt-oss-20b`   |
+## Working with UV (Ultra-Violet)
+### Install UV
+Please download `uv` (Ultra-Violet) for Python Project Dependency Management: https://docs.astral.sh/uv/getting-started/installation/#installation-methods
+### Initializing a uv virtual env
+Run following commands by navigating to the project directory:
+```bash
+cd /path/to/your/project
+uv sync
+```
+### Activating the virtual env
+In the same project directory, execute the following (if virtual env is not already active):
+```bash
+source .venv/bin/activate
+```
+### Adding any Libraries / Dependencies
+To add any new dependencies (libraries):
+```bash
+uv add <library_name>
+```
+## Working with HuggingFace Spaces Locally
+### Install Gradio with oAuth
+Run the following command in your Python environment:
+```bash
+uv add "gradio[oauth]"
+```
+### Set up HuggingFace Token
+1. Go to your HuggingFace profile at: https://huggingface.co/settings/tokens
+2. Generate a new token for your HuggingFace Space at `Create New Token` -> `Fine-grained`.
+3. Under `Repository permissions` section, search for the repo: "spaces/MLDevOps/CS553_CaseStudy1" and select it
+4. Check the box for "Write access to contents/settings of selected repos" and click "Create Token" at the bottom.
+5. Copy and Paste the generated token into a `.env` file in the root directory of your local copy of CS553_CaseStudy1 repo:
+```
+HF_TOKEN=XXXXXXXXX
+```
+6. Login into HF:
+```bash
+hf auth login
+```
+### Running Gradio App on HuggingFace Spaces Locally
+Run the following command:
+```bash
+python app.py
+```
+It will spit out logs indicating the url to open in browser:
+```
+...
+* Running on local URL:  http://127.0.0.1:7860
+...
+```
+### Debugging Gradio Issue
+In app.py, the line:
+```python
+chatbot = gr.ChatInterface(
+    respond,
+    type="messages",
+    ...
+)
+```
+might need to be changed to remove the type line as follows due to a deprecation issue on HuggingFace Spaces:
+```python
+chatbot = gr.ChatInterface(
+    respond,
+    ...
+)
+```
+With this, run the program and it should work locally on localhost server!

anime.db ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9f2c79bc2be7d84d3089640ea010361c4aea5eb4e580e17148f60e68337ced4
+size 1294336

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import gradio as gr
+from pathlib import Path
+import backend
+from constants import *
+theme_css = Path("static/css/theme.css").read_text() if Path("static/css/theme.css").exists() else ""
+main_css = Path("static/css/gradiomain.css").read_text()
+CSS = theme_css + "\n\n" + main_css
+# Load static directory
+gr.set_static_paths(paths=[Path.cwd().absolute()/"static"])
+# Adapter function between frontend and backend. Returns a generator yielding backend results.
+def respond(
+    message,
+    history: list[dict[str, str]],
+    use_local_model,
+    hf_token: gr.OAuthToken,
+):
+    for r in backend.process_user_query(SYSTEM_PROMPT, history, message, use_local_model, MAX_TOKENS, TEMPERATURE, TOP_P, hf_token.token):
+        yield r
+with gr.Blocks() as homepage:
+    gr.Markdown(
+        """
+        # Ani<span style="font-size: 2rem;">ℤ</span>enith
+        An AI designed to give recommendations of the best anime options based on your preferences! Has knowledge of a full database of anime!
+        """,
+        elem_classes=["page-header"]
+    )
+    with gr.Sidebar():
+        gr.LoginButton()
+    local_model = gr.Checkbox(
+        label="Use Local Model?",
+        value=False,
+        elem_classes=["toggle-button"]
+    )
+    # Main chatbot interface
+    chatbot = gr.ChatInterface(
+        respond,
+        additional_inputs=[
+            local_model,
+        ],
+    )
+    chatbot.chatbot.elem_classes = ["custom-chatbot"]
+if __name__ == "__main__":
+    homepage.launch(css=CSS)

backend.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from typing import List
+from huggingface_hub import InferenceClient
+from transformers import pipeline
+from retrieval_utils import get_recommendations
+genre_list = open("genrelist.txt", "r").read().splitlines()
+def process_user_query(system_message: str, history: List[dict], user_message: str, use_local_model: bool, max_tokens: int, temperature: float, top_p: float, hf_token):
+    # 1. Retrieve genres from the user message using naive approach
+    genre_list = detect_genres(user_message)
+    # 2. Retrieve relevant results from DB if the genre_list is not empty
+    recommendations_string = ""
+    if len(genre_list) > 0:
+        recommendations_string = get_recommendations(genre_list)
+    # 3. Query the model
+    for result in query_model(system_message,
+                              history,
+                              user_message,
+                              recommendations_string,
+                              use_local_model,
+                              max_tokens,
+                              temperature,
+                              top_p,
+                              hf_token):
+        yield result
+def detect_genres(message: str) -> List[str]:
+    requested_genres = []
+    # Simple naive genre check by detecting if any of our system stored genres are within the user query
+    # TODO: Improve genre detection instead to use Retriever and RAG framework in the future
+    for genre in genre_list:
+        if message.__contains__(genre):
+            requested_genres.append(genre)
+    return requested_genres
+def query_model(
+        system_message: str,
+        history: List[dict],
+        user_message: str,
+        recommendations_string: str,
+        use_local_model: bool,
+        max_tokens: int, # TODO: Remove this and hardcode a value in constants.py
+        temperature: float, # TODO: Remove this and hardcode a value in constants.py
+        top_p: float, # TODO: Remove this and hardcode a value in constants.py
+        hf_token):
+    # Construct messages for the language model
+    # Start by adding system prompt
+    system_prompt = system_message
+    if recommendations_string:
+        system_prompt += "\nRECOMMENDATION JSON:" + f"\n{recommendations_string}"
+    messages = [{"role": "system", "content": system_prompt}]
+    # Add the rest of the history
+    messages.extend(history)
+    # Add the current user prompt
+    messages.append({"role": "user", "content": user_message})
+    # Determine which model to use (local or external)
+    if use_local_model:
+        # Local Model -- Uses pipeline from transformers library
+        pipeline_local_model = pipeline(task='text-generation',
+                               model='Qwen/Qwen3-0.6B',
+                               max_new_tokens=max_tokens,
+                               temperature=temperature,
+                               do_sample=False,
+                               top_p=top_p
+                               )
+        # Get the response from the local model
+        response = pipeline_local_model(messages)
+        # Parse the output and yield it
+        yield response[0]['generated_text'][-1]['content'].split('</think>')[-1].strip()
+    elif not use_local_model:
+        # Non-local Model -- Use InferenceClient
+        client = InferenceClient(
+            token=hf_token,
+            model="openai/gpt-oss-20b",
+        )
+        response = ""
+        for chunk in client.chat_completion(
+                messages=messages,
+                max_tokens=max_tokens,
+                stream=True,
+                temperature=temperature,
+                top_p=top_p,
+        ):
+            if chunk.choices and chunk.choices[0].delta.content:
+                token = chunk.choices[0].delta.content
+                response += token
+                yield response

constants.py ADDED Viewed

	@@ -0,0 +1,15 @@

+SYSTEM_PROMPT = f"""
+You are an expert on recommending Anime shows. Please use the RECOMMENDATIONS to answer the user's question.
+The RECOMMENDATIONS is a JSON String that contains information of top Anime sorted in descending order by:
+1. Number of Requested Genre Matches from the User
+2. The Score of the Anime
+If the RECOMMENDATIONS JSON String is not given:
+1. Then answer the question like a Friendly Chatbot!
+2. Do not reference anything about a RECOMMENDATION JSON
+3. Ask the user to provide their favorite genre(s) for Anime Recommendations
+"""
+MAX_TOKENS = 2048
+TEMPERATURE = 0.7
+TOP_P = 0.7

create_db.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import pandas as pd
+import sqlite3
+# Load the Anime Dataset as a Pandas DataFrame
+df = pd.read_csv('data/anime-dataset-2023.csv')
+# Separate the DataFrame into separate tables
+# Genre Table
+# Create a separate DataFrame using Genres column
+genres_df = df['Genres'].str.split(', ').explode().str.strip().drop_duplicates().reset_index(drop=True)
+genres_df = pd.DataFrame({'genre_id': range(1, len(genres_df) + 1), 'genre_name': genres_df})
+# Exclude some Genres
+excluded_genres = ['Hentai', 'UNKNOWN', 'Erotica', 'Ecchi']
+genres_df = genres_df[~genres_df['genre_name'].isin(excluded_genres)]
+# Anime Table
+# Filter out animes that contain excluded genres
+pattern = '|'.join(excluded_genres)
+anime_df = df[~df['Genres'].str.contains(pattern, na=True)]
+# Filter out animes without English Names
+anime_df = anime_df[anime_df['English name'] != 'UNKNOWN']
+# Filter out animes without Scores
+anime_df = anime_df[anime_df['Score'] != 'UNKNOWN']
+# Filter out animes without Synopsis
+anime_df = anime_df[anime_df['Synopsis'] != 'No description available for this anime.']
+# Sort by Score and Keep only Top 1000 Animes
+anime_df = anime_df.sort_values(by='Score', ascending=False)
+anime_df = anime_df.head(1000)
+# Rename Columns
+anime_df = anime_df.rename(columns={'English name': 'name', 'Score': 'score', 'Synopsis': 'synopsis'})
+# AnimeGenre Table
+# Create a mapping of genre_name to genre_id
+genre_mapping = genres_df.set_index('genre_name')['genre_id'].to_dict()
+# Explode genres for filtered animes and map to genre_ids
+anime_genre_df = anime_df[['anime_id', 'Genres']].copy()
+anime_genre_df = anime_genre_df.assign(genre_name=anime_genre_df['Genres'].str.split(', ')).explode('genre_name')
+anime_genre_df['genre_name'] = anime_genre_df['genre_name'].str.strip()
+anime_genre_df['genre_id'] = anime_genre_df['genre_name'].map(genre_mapping)
+anime_genre_df = anime_genre_df[['anime_id', 'genre_id']].dropna()
+anime_genre_df['genre_id'] = anime_genre_df['genre_id'].astype(int)
+# Final Clean Up
+# Keep only anime_id, Name, Score and Synopsis Columns
+anime_df = anime_df[['anime_id', 'name', 'score', 'synopsis']]
+anime_df = anime_df.rename(columns={'anime_id': 'id'})
+genres_df = genres_df.rename(columns={'genre_id': 'id'})
+SCHEMA_SQL = '''
+PRAGMA foreign_keys = ON;
+CREATE TABLE IF NOT EXISTS Anime (
+  id INTEGER PRIMARY KEY,
+  name VARCHAR(50),
+  score FLOAT,
+  synopsis TEXT
+);
+CREATE TABLE IF NOT EXISTS Genre (
+  id INTEGER PRIMARY KEY,
+  genre_name VARCHAR(20)
+);
+CREATE TABLE IF NOT EXISTS AnimeGenre (
+  anime_id INTEGER NOT NULL,
+  genre_id INTEGER NOT NULL,
+  PRIMARY KEY (anime_id, genre_id),
+  FOREIGN KEY (anime_id) REFERENCES Anime(id) ON DELETE CASCADE ON UPDATE CASCADE,
+  FOREIGN KEY (genre_id) REFERENCES Genre(id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+CREATE INDEX IF NOT EXISTS idx_anime_id ON AnimeGenre(anime_id);
+CREATE INDEX IF NOT EXISTS idx_genre_id ON AnimeGenre(genre_id);
+'''
+with sqlite3.connect('anime.db') as conn:
+    conn.executescript(SCHEMA_SQL)
+    anime_df.to_sql('Anime', conn, if_exists='delete_rows', index=False, method='multi')
+    genres_df.to_sql('Genre', conn, if_exists='delete_rows', index=False, method='multi')
+    anime_genre_df.to_sql('AnimeGenre', conn, if_exists='delete_rows', index=False, method='multi')

data/.gitkeep ADDED Viewed

File without changes

genrelist.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+Action
+Award Winning
+Sci-Fi
+Adventure
+Drama
+Mystery
+Supernatural
+Fantasy
+Sports
+Comedy
+Romance
+Slice of Life
+Suspense
+Gourmet
+Avant Garde
+Horror

main.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def main():
+    print("Hello from cs553-casestudy1!")
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,14 @@

+[project]
+name = "cs553-casestudy1"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "gradio[oauth]>=6.5.1",
+    "huggingface-hub>=1.3.5",
+    "pytest>=9.0.2",
+    "itsdangerous>=2.2.0",
+    "torch>=2.10.0",
+    "transformers>=5.0.0",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,85 @@

+aiofiles==24.1.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anyio==4.12.1
+authlib==1.6.6
+brotli==1.2.0
+certifi==2026.1.4
+cffi==2.0.0
+click==8.3.1
+cryptography==46.0.4
+cuda-bindings==12.9.4
+cuda-pathfinder==1.3.3
+fastapi==0.128.0
+ffmpy==1.0.0
+filelock==3.20.3
+fsspec==2026.1.0
+gradio==6.5.1
+gradio-client==2.0.3
+groovy==0.1.2
+h11==0.16.0
+hf-xet==1.2.0
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==1.3.7
+idna==3.11
+iniconfig==2.3.0
+itsdangerous==2.2.0
+jinja2==3.1.6
+markdown-it-py==4.0.0
+markupsafe==3.0.3
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.6.1
+numpy==2.4.2
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.4.5
+nvidia-nvtx-cu12==12.8.90
+orjson==3.11.7
+packaging==26.0
+pandas==3.0.0
+pillow==12.1.0
+pluggy==1.6.0
+pycparser==3.0
+pydantic==2.12.5
+pydantic-core==2.41.5
+pydub==0.25.1
+pygments==2.19.2
+pytest==9.0.2
+python-dateutil==2.9.0.post0
+python-multipart==0.0.22
+pytz==2025.2
+pyyaml==6.0.3
+regex==2026.1.15
+rich==14.3.2
+safehttpx==0.1.7
+safetensors==0.7.0
+semantic-version==2.10.0
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+starlette==0.50.0
+sympy==1.14.0
+tokenizers==0.22.2
+tomlkit==0.13.3
+torch==2.10.0
+tqdm==4.67.3
+transformers==5.0.0
+triton==3.6.0
+typer==0.21.1
+typer-slim==0.21.1
+typing-extensions==4.15.0
+typing-inspection==0.4.2
+uvicorn==0.40.0

retrieval_utils.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import sqlite3
+from typing import List, Tuple
+import json
+# Constants
+DB_PATH = "anime.db"
+def get_recommendations(requested_genres: List[str], limit: int = 5) -> str:
+    # Establish Connection and Cursor
+    connection = sqlite3.connect(DB_PATH)
+    cursor = connection.cursor()
+    # Prepare placeholders for the SQL Query
+    placeholders = ', '.join(['?'] * len(requested_genres))
+    # Define SQL Query for the Weighted Ranking Logic
+    query = f"""
+    -- Project only Anime's name, score, and synopsis
+    SELECT
+        A.name,
+        A.score,
+        A.synopsis
+    -- From Anime Table
+    FROM Anime A
+    -- Join Anime Table with AnimeGenre on ids
+    JOIN AnimeGenre AG ON A.id = AG.anime_id
+    -- Join Genre Table with AnimeGenre on ids
+    JOIN Genre G ON AG.genre_id = G.id
+    -- Filter by only genre_name which belong in the list of requested genres
+    WHERE G.genre_name IN ({placeholders})
+    -- Group by Anime id
+    GROUP BY A.id
+    -- Primary Sort by Count of Matches in the requested_genres list
+    -- Secondary Sort by Anime's score
+    ORDER BY COUNT(G.id) DESC, A.score DESC
+    -- Return only the top 5 matches
+    LIMIT ?
+    """
+    # Execute the Query with the requested genres and the limit
+    cursor.execute(query, requested_genres + [limit])
+    # Gather the results
+    results = cursor.fetchall()
+    # Close the Connection
+    connection.close()
+    # Compose a JSON String from the results and return it
+    return jsonify_recommendations(results)
+def jsonify_recommendations(recommendations: List[Tuple[str, float, str]]) -> str:
+    # Process each anime recommendations into a list of dicts (for easy JSON conversion)
+    list_of_dicts = []
+    for anime in recommendations:
+        list_of_dicts.append({
+            'name': anime[0],
+            'score': anime[1],
+            'description': anime[2]
+        })
+    # JSONify and return the list of dicts
+    return json.dumps(list_of_dicts, indent=4)
+# Driver Code
+if __name__ == '__main__':
+    requested_genres = ["Action", "Drama"]
+    recommendations = get_recommendations(requested_genres)
+    print(recommendations)

static/css/gradiomain.css ADDED Viewed

	@@ -0,0 +1,240 @@

+/* ====== MAIN APP CONTAINER AND HEADER ====== */
+.page-header {
+    text-align: center;
+    border-radius: 22px;
+    margin-bottom: 1rem;
+    margin-top: 0.25rem;
+}
+.page-header h1 {
+    margin-bottom: 1.2rem;
+    margin-top: 2rem;
+}
+.gradio-container {
+    background-image:
+        linear-gradient(rgba(12, 13, 35, 0.6), rgba(12, 13, 35, 0.8)),
+        url('/gradio_api/file=static/images/background.png');
+    background-size: cover;
+    background-position: center;
+    background-repeat: no-repeat;
+    background-attachment: fixed;
+    background-blend-mode: hard-light;
+    color: var(--text-main);
+    font-family: 'Inter', sans-serif;
+    min-height: 100vh;
+    padding: 20px;
+}
+/* ====== CHATBOT CONTAINER ====== */
+.custom-chatbot,
+[role="log"] {
+    background: var(--bg-glass-strong) !important;
+    border-radius: 22px !important;
+}
+/* Why is Gradio like this )): */
+.block,
+.form,
+.gr-group {
+    background: var(--bg-glass-strong);
+    border-radius: 22px !important;
+}
+/* ====== CHAT MESSAGES ====== */
+.bot,
+.user {
+    padding: 16px 20px;
+    border-radius: 18px;
+    margin: 12px 0;
+    max-width: 70%;
+    min-width: fit-content;
+    display: inline-flex;
+    border: 1px solid var(--border-soft);
+    box-shadow: 0 4px 20px rgba(122, 162, 255, 0.2);
+    white-space: normal;
+    overflow-wrap: break-word;
+    word-break: normal;
+    box-sizing: border-box;
+}
+.bot {
+    background: var(--bot-message);
+    color: var(--text-main);
+    margin-right: auto;
+    margin-left: 0;
+}
+.user {
+    background: var(--user-message);
+    color: white;
+    margin-left: auto;
+    margin-right: 0;
+}
+/* ====== CHAT INPUT AREA ====== */
+.gr-text-input,
+[data-testid="textbox"] {
+    background: rgba(40, 24, 65, 0.6) !important;
+    border-radius: 18px !important;
+    border: 1px solid var(--border-soft) !important;
+    min-height: 60px !important;
+    height: auto !important;
+    margin-right: 12px !important;
+    box-shadow: 0 0 10px rgba(255, 124, 229, 0.1);
+}
+.gr-text-input:hover,
+textarea:hover {
+    box-shadow: 0 0 15px var(--accent-glow) !important;
+    transition: box-shadow 0.3s ease !important;
+}
+.gr-text-input:focus,
+textarea:focus {
+    box-shadow: 0 0 20px var(--accent-glow) !important;
+    outline: none !important;
+    transition: box-shadow 0.3s ease !important;
+}
+textarea {
+    background: transparent !important;
+    color: var(--text-main) !important;
+    border: none !important;
+    border-radius: 16px !important;
+    padding: 16px 20px !important;
+    font-size: 16px !important;
+    line-height: 1.5 !important;
+    min-height: 60px !important;
+    height: auto !important;
+    resize: vertical !important;
+    overflow-y: auto !important;
+    max-height: 200px !important;
+    white-space: pre-wrap !important;
+    word-wrap: break-word !important;
+}
+textarea::placeholder {
+    color: var(--text-muted) !important;
+    opacity: 0.75 !important;
+}
+/* ====== BUTTON STUFF ====== */
+button[data-testid="login-button"],
+.submit-button {
+    background: var(--btn-bg);
+    border: none !important;
+    border-radius: 16px !important;
+    color: white !important;
+    font-weight: 600 !important;
+    padding: 16px 28px !important;
+    min-height: 60px !important;
+    box-shadow: none;
+    cursor: pointer !important;
+}
+.submit-button:hover,
+.submit-button:focus-visible {
+    box-shadow: 0 0 18px var(--accent-glow);
+    transform: translateY(-1px);
+    filter: brightness(1.05);
+}
+/* Sidebar buttons */
+.sidebar button {
+    background: linear-gradient(135deg, #ff7ce5, #7ac5ff) !important;
+}
+.sidebar button:hover {
+    background: linear-gradient(135deg, #ff9de0, #7ac5ff) !important;
+}
+/* Toggle Buttons */
+.block.toggle-button {
+    margin-left: auto !important;
+    margin-right: auto !important;
+    width: fit-content;
+}
+.toggle-button input[type="checkbox"] {
+    display: none;
+}
+.toggle-button label {
+    display: inline-flex;
+    align-items: center;
+    gap: 0.4rem;
+    padding: 0.65rem 1.2rem;
+    border-radius: 999px;
+    cursor: pointer;
+    font-weight: 600;
+    letter-spacing: 0.02em;
+    background: var(--btn-bg);
+    color: var(--btn-text);
+    border: 1px solid var(--btn-border);
+    transition:
+        background 0.25s ease,
+        box-shadow 0.25s ease,
+        transform 0.25s ease;
+}
+.toggle-button label:hover {
+    background: var(--btn-bg-hover);
+    box-shadow: 0 0 22px rgba(122, 197, 255, 0.7);
+}
+.toggle-button label:has(input:checked) {
+    background: var(--btn-bg-active);
+    box-shadow: 0 0 28px rgba(122, 197, 255, 0.9);
+}
+/* Add the word ON in green text if enabled local model */
+.toggle-button label:has(input:checked)::after {
+    content: " ON";
+    font-weight: 700;
+    color: #4cffc3;
+    margin-left: 0.3rem;
+    text-shadow: 0 0 6px rgba(76, 255, 195, 0.8);
+}
+.toggle-button label:active {
+    transform: scale(0.96);
+}
+/* ====== SLIDERS ====== */
+.custom-slider input[type="range"]::-webkit-slider-thumb {
+    background: var(--accent-secondary) !important;
+    width: 22px !important;
+    height: 22px !important;
+    border-radius: 50% !important;
+    border: 2px solid white !important;
+    box-shadow: 0 0 12px var(--accent-glow);
+}
+.custom-slider input[type="range"]::-webkit-slider-runnable-track {
+    height: 8px;
+    border-radius: 8px;
+    background: linear-gradient(120deg, var(--accent-primary), var(--accent-secondary));
+}
+/* ====== SIDEBAR ====== */
+.sidebar {
+    background-image:
+        linear-gradient(rgba(20, 24, 55, 0.4), rgba(20, 24, 55, 0.4)),
+        url('/gradio_api/file=static/images/sidebar.jpg');
+    background-size: auto, cover;
+    background-position: top left, center;
+    background-repeat: no-repeat, no-repeat;
+    border-right: 1px solid var(--border-soft) !important;
+    backdrop-filter: blur(15px);
+    padding: 20px !important;
+}

static/css/theme.css ADDED Viewed

	@@ -0,0 +1,35 @@

+/* This page contains base theme colors for the app */
+:root {
+    /* Base backgrounds */
+    --bg-main: #0c0d23;
+    --bg-glass: rgba(28, 20, 55, 0.55);
+    --bg-glass-strong: rgba(28, 20, 55, 0.85);
+    --bg-gradient: linear-gradient(135deg, #1b1b40, #3a1f5f);
+    /* Accents */
+    --accent-primary: #ff7ce5;
+    --accent-secondary: #7ac5ff;
+    --accent-glow: rgba(255, 124, 229, 0.5);
+    /* Borders and shadows */
+    --border-soft: rgba(255, 255, 255, 0.08);
+    --shadow-soft: 0 15px 35px rgba(255, 124, 229, 0.3);
+    /* Text */
+    --text-main: #f0f0ff;
+    --text-muted: #b9b6ff;
+    /* Chat messages */
+    --user-message: linear-gradient(135deg, #ff9de0, #7ac5ff);
+    --bot-message: linear-gradient(135deg, #ff9de0, #7ac5ff);
+    /* Buttons */
+    --btn-bg: linear-gradient(135deg, #7ac5ff, #9aa4ff);
+    --btn-bg-hover: linear-gradient(135deg, #8fd0ff, #aab3ff);
+    --btn-bg-active: linear-gradient(135deg, #6bb6f5, #8f98ff);
+    --btn-text: #0c0d23;
+    --btn-border: rgba(122, 197, 255, 0.45);
+    --btn-glow: 0 0 18px rgba(122, 197, 255, 0.55);
+}

static/images/background.png ADDED Viewed

Git LFS Details

SHA256: f0bcd0de1788bb1f4d95780afc11429cd8009c8d9825bb8fd29af1f54e09073c
Pointer size: 132 Bytes
Size of remote file: 1.43 MB

static/images/sidebar.jpg ADDED Viewed

Git LFS Details

SHA256: 0ea120fe1e7e72cd01e57f3b9d141262239b0daa9fb5089e250332b1fe207a29
Pointer size: 131 Bytes
Size of remote file: 525 kB

test_detect_genre.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from backend import detect_genres
+def test_one_genre():
+    detect_genres("I like Action"),["Action"]
+def test_multiple_genres():
+    detect_genres("I like Action and Mystery"),["Action","Mystery"]
+def test_no_genres():
+    detect_genres("I like Interesting shows"),[]

tests/test_chat_models.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+import pytest
+from backend import process_user_query
+TEST_SYSTEM_MESSAGE = "You are a friendly chatbot."
+TEST_USER_MESSAGE = "Hello!"
+HF_TOKEN = os.getenv("HF_TOKEN")
+def test_HF_token_exists():
+    token = os.getenv("HF_TOKEN")
+    assert token is not None
+    assert len(token) > 1
+def test_local_model_runs():
+    use_local_model = True
+    collected_result = ""
+    for result in process_user_query(system_message=TEST_SYSTEM_MESSAGE,
+                       history=[],
+                       user_message=TEST_USER_MESSAGE,
+                       use_local_model=use_local_model,
+                       max_tokens=100,
+                       temperature=0.7,
+                       top_p=0.7,
+                       hf_token=HF_TOKEN):
+        collected_result = result
+    assert len(collected_result) > 0
+def test_external_model_runs():
+    use_local_model = False
+    collected_result = ""
+    for result in process_user_query(system_message=TEST_SYSTEM_MESSAGE,
+                                     history=[],
+                                     user_message=TEST_USER_MESSAGE,
+                                     use_local_model=use_local_model,
+                                     max_tokens=100,
+                                     temperature=0.7,
+                                     top_p=0.7,
+                                     hf_token=HF_TOKEN):
+        collected_result = result
+    assert len(collected_result) > 0
+if __name__ == "__main__":
+    pytest.main()

tests/test_retrieval_utils.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import json
+import sqlite3
+from pathlib import Path
+import pytest
+import retrieval_utils
+from retrieval_utils import get_recommendations
+# Setup Test DB
+def _setup_test_db(db_path: str):
+    # Setup the DB Connection
+    connection = sqlite3.connect(db_path)
+    cursor = connection.cursor()
+    # Create Test Anime Table
+    cursor.execute(
+        """
+        CREATE TABLE Anime (
+            id INTEGER PRIMARY KEY,
+            name TEXT NOT NULL,
+            score REAL NOT NULL,
+            synopsis TEXT
+        );
+        """
+    )
+    # Create Test Genre Table
+    cursor.execute(
+        """
+        CREATE TABLE Genre (
+            id INTEGER PRIMARY KEY,
+            genre_name TEXT NOT NULL
+        );
+        """
+    )
+    # Create Test AnimeGenre Table
+    cursor.execute(
+        """
+        CREATE TABLE AnimeGenre (
+            anime_id INTEGER NOT NULL,
+            genre_id INTEGER NOT NULL
+        );
+        """
+    )
+    # Define new values to be inserted in the Anime Table
+    anime_rows = [
+        (1, "Alpha", 9.1, "Alpha synopsis"),
+        (2, "Beta", 8.7, "Beta synopsis"),
+        (3, "Gamma", 8.9, "Gamma synopsis"),
+        (4, "Delta", 7.5, "Delta synopsis"),
+    ]
+    # Define new values to be inserted in the Genre Table
+    genre_rows = [
+        (1, "Action"),
+        (2, "Drama"),
+        (3, "Comedy"),
+    ]
+    # Define new values to be inserted in the AnimeGenre Table
+    anime_genre_rows = [
+        (1, 1), (1, 2),  # Alpha: Action, Drama (2 matches)
+        (2, 1),          # Beta: Action (1 match)
+        (3, 2),          # Gamma: Drama (1 match)
+        (4, 3),          # Delta: Comedy (0 matches for Action/Drama)
+    ]
+    # Insert into all Tables the defined new values above
+    cursor.executemany("INSERT INTO Anime VALUES (?, ?, ?, ?);", anime_rows)
+    cursor.executemany("INSERT INTO Genre VALUES (?, ?);", genre_rows)
+    cursor.executemany("INSERT INTO AnimeGenre VALUES (?, ?);", anime_genre_rows)
+    # Commit all the writes to the DB file
+    connection.commit()
+    # Close the cursor and the connection
+    cursor.close()
+    connection.close()
+def test_get_recommendations_orders_by_match_count_then_score(tmp_path: Path,
+                                                              monkeypatch: pytest.MonkeyPatch):
+    # Setup Test Data and Mocks
+    # Construct a temporary path for the Test DB
+    db_path = tmp_path / "test.db"
+    # Setup the test db
+    _setup_test_db(str(db_path))
+    # Monkeypatch the DB_PATH variable of retrieval_utils file
+    monkeypatch.setattr(retrieval_utils, "DB_PATH", str(db_path))
+    # Execute the Method under Test
+    result_json = get_recommendations(["Action", "Drama"], limit=3)
+    result = json.loads(result_json)
+    # Assert on the results
+    assert [item["name"] for item in result] == ["Alpha", "Gamma", "Beta"]
+    assert result[0]["score"] == 9.1
+    assert "description" in result[0]
+def test_get_recommendations_respects_limit(tmp_path: Path,
+                                            monkeypatch: pytest.MonkeyPatch):
+    # Setup Test Data and Mocks
+    # Construct a temporary path for the Test DB
+    db_path = tmp_path / "test.db"
+    # Setup the test db
+    _setup_test_db(str(db_path))
+    # Monkeypatch the DB_PATH variable of retrieval_utils file
+    monkeypatch.setattr(retrieval_utils, "DB_PATH", str(db_path))
+    # Execute the Method under Test
+    result_json = get_recommendations(["Action", "Drama"], limit=1)
+    result = json.loads(result_json)
+    # Assert on the results
+    assert len(result) == 1
+    assert result[0]["name"] == "Alpha"
+def test_get_recommendations_single_genre(tmp_path: Path,
+                                          monkeypatch: pytest.MonkeyPatch):
+    # Setup Test Data and Mocks
+    # Construct a temporary path for the Test DB
+    db_path = tmp_path / "test.db"
+    # Setup the test db
+    _setup_test_db(db_path)
+    # Monkeypatch the DB_PATH variable of retrieval_utils file
+    monkeypatch.setattr(retrieval_utils, "DB_PATH", str(db_path))
+    # Execute the Method under Test
+    result_json = get_recommendations(["Drama"], limit=5)
+    result = json.loads(result_json)
+    # Assert on the results
+    assert [item["name"] for item in result] == ["Alpha", "Gamma"]
+    assert all("description" in item for item in result)
+def test_get_recommendations_no_genre(tmp_path: Path,
+                                      monkeypatch: pytest.MonkeyPatch):
+    # Setup Test Data and Mocks
+    # Construct a temporary path for the Test DB
+    db_path = tmp_path / "test.db"
+    # Setup the test db
+    _setup_test_db(db_path)
+    # Monkeypatch the DB_PATH variable of retrieval_utils file
+    monkeypatch.setattr(retrieval_utils, "DB_PATH", str(db_path))
+    # Execute the Method under Test
+    result_json = get_recommendations([], limit=5)
+    result = json.loads(result_json)
+    # Assert on the results
+    assert len(result) == 0

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff