Penguindrum920 commited on
Commit
a1614c5
·
verified ·
1 Parent(s): f7f4c71

Upload 72 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. backend/.dockerignore +21 -0
  3. backend/.env.example +29 -0
  4. backend/.python-version +1 -0
  5. backend/Dockerfile +28 -0
  6. backend/README.md +8 -0
  7. backend/__pycache__/config.cpython-313.pyc +0 -0
  8. backend/__pycache__/main.cpython-313.pyc +0 -0
  9. backend/__pycache__/security.cpython-313.pyc +0 -0
  10. backend/aniverse.db +0 -0
  11. backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/data_level0.bin +3 -0
  12. backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/header.bin +3 -0
  13. backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/index_metadata.pickle +3 -0
  14. backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/length.bin +3 -0
  15. backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/link_lists.bin +3 -0
  16. backend/chroma_db/chroma.sqlite3 +3 -0
  17. backend/config.py +33 -0
  18. backend/data/__init__.py +1 -0
  19. backend/data/__pycache__/__init__.cpython-313.pyc +0 -0
  20. backend/data/__pycache__/anime_schema.cpython-313.pyc +0 -0
  21. backend/data/__pycache__/data_loader.cpython-313.pyc +0 -0
  22. backend/data/__pycache__/database.cpython-313.pyc +0 -0
  23. backend/data/__pycache__/manga_loader.cpython-313.pyc +0 -0
  24. backend/data/__pycache__/manga_schema.cpython-313.pyc +0 -0
  25. backend/data/anime_schema.py +68 -0
  26. backend/data/data_loader.py +170 -0
  27. backend/data/database.py +95 -0
  28. backend/data/manga_loader.py +129 -0
  29. backend/data/manga_schema.py +52 -0
  30. backend/embeddings/__init__.py +1 -0
  31. backend/embeddings/__pycache__/__init__.cpython-313.pyc +0 -0
  32. backend/embeddings/__pycache__/chroma_store.cpython-313.pyc +0 -0
  33. backend/embeddings/__pycache__/manga_chroma_store.cpython-313.pyc +0 -0
  34. backend/embeddings/__pycache__/search_utils.cpython-313.pyc +0 -0
  35. backend/embeddings/build_embeddings.py +66 -0
  36. backend/embeddings/build_manga_embeddings.py +61 -0
  37. backend/embeddings/chroma_store.py +162 -0
  38. backend/embeddings/manga_chroma_store.py +156 -0
  39. backend/embeddings/search_utils.py +126 -0
  40. backend/llm/__init__.py +1 -0
  41. backend/llm/__pycache__/__init__.cpython-313.pyc +0 -0
  42. backend/llm/__pycache__/groq_client.cpython-313.pyc +0 -0
  43. backend/llm/groq_client.py +162 -0
  44. backend/main.py +88 -0
  45. backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/data_level0.bin +3 -0
  46. backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/header.bin +3 -0
  47. backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/index_metadata.pickle +3 -0
  48. backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/length.bin +3 -0
  49. backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/link_lists.bin +3 -0
  50. backend/manga_chroma_db/chroma.sqlite3 +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ backend/chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
37
+ backend/manga_chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
backend/.dockerignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ .env
7
+ .venv/
8
+ venv/
9
+ ENV/
10
+ *.egg-info/
11
+ .eggs/
12
+ dist/
13
+ build/
14
+ *.egg
15
+ .git
16
+ .gitignore
17
+ .pytest_cache/
18
+ .mypy_cache/
19
+ *.log
20
+ .DS_Store
21
+ Thumbs.db
backend/.env.example ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment Configuration Template
2
+ # Copy this to .env and fill in your values
3
+
4
+ # Required: Groq API Key for AI chat
5
+ GROQ_API_KEY=your_groq_api_key_here
6
+
7
+ # Optional: MyAnimeList API (for MAL import feature)
8
+ MAL_CLIENT_ID=your_mal_client_id
9
+ MAL_CLIENT_SECRET=your_mal_client_secret
10
+
11
+ # Server Configuration (defaults work for development)
12
+ HOST=0.0.0.0
13
+ PORT=8000
14
+
15
+ # LLM Model (default: llama-3.1-8b-instant)
16
+ LLM_MODEL=llama-3.1-8b-instant
17
+
18
+ # Database Path (default: ./aniverse.db)
19
+ DATABASE_PATH=./aniverse.db
20
+
21
+ # ChromaDB Paths (defaults work for Docker)
22
+ CHROMA_DB_PATH=./data/chroma_db
23
+ MANGA_CHROMA_DB_PATH=./data/manga_chroma_db
24
+
25
+ # CORS Origins (comma-separated for production)
26
+ CORS_ORIGINS=http://localhost:5500,http://localhost:3000
27
+
28
+ # Production Mode (set to 'true' for production)
29
+ PRODUCTION=false
backend/.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11.0
backend/Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AniVerse Backend - Hugging Face Spaces Dockerfile
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ gcc \
10
+ g++ \
11
+ curl \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Copy requirements first for caching
15
+ COPY requirements.txt .
16
+
17
+ # Install Python dependencies
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy application code
21
+ COPY . .
22
+
23
+ # Set environment variables
24
+ ENV PYTHONUNBUFFERED=1
25
+ ENV PORT=7860
26
+
27
+ # Download data on startup and run server
28
+ CMD python setup_data.py && uvicorn main:app --host 0.0.0.0 --port 7860
backend/README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: AniVerse API
3
+ emoji: 🎌
4
+ colorFrom: purple
5
+ colorTo: pink
6
+ sdk: docker
7
+ app_port: 7860
8
+ ---
backend/__pycache__/config.cpython-313.pyc ADDED
Binary file (939 Bytes). View file
 
backend/__pycache__/main.cpython-313.pyc ADDED
Binary file (3.14 kB). View file
 
backend/__pycache__/security.cpython-313.pyc ADDED
Binary file (5.01 kB). View file
 
backend/aniverse.db ADDED
Binary file (36.9 kB). View file
 
backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0ec6d7f7c3e18b3416b0c55da30d6357a455820b85b3935dd1e630a719e19e
3
+ size 34616104
backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3dd664db442013f61804af49e0c50db29dd271dfaec5d2d737b62219d2e8ada
3
+ size 100
backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8488b075b81b5e4a24c0d5a07c346f431b467db1d4bc40faf5b7b38bd8955a07
3
+ size 605982
backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c37ac8716cd37bc387370b60d0552618ac361de76f5c44e32aa5aa17cb2b5dd
3
+ size 82616
backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f3973cb0499e959fdc45c3c1900beca3c75e0ed7e67fa93818a84c64a250268
3
+ size 180876
backend/chroma_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc52dc177e5a53724f27d6656234d10c623ddc1f014125b2738e0bceb777cc67
3
+ size 134754304
backend/config.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AniVerse Configuration"""
2
+ import os
3
+ from pathlib import Path
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ # Paths - support both local development and container deployment
9
+ BASE_DIR = Path(__file__).parent.parent
10
+ BACKEND_DIR = Path(__file__).parent
11
+
12
+ # Check if running in container (dataset will be in /app/dataset)
13
+ if (BACKEND_DIR / "dataset").exists():
14
+ DATASET_PATH = BACKEND_DIR / "dataset" / "anime.csv"
15
+ MANGA_DATASET_PATH = BACKEND_DIR / "dataset" / "manga_data" / "MAL-manga.csv"
16
+ else:
17
+ DATASET_PATH = BASE_DIR / "dataset" / "anime.csv"
18
+ MANGA_DATASET_PATH = BASE_DIR / "dataset" / "manga data" / "MAL-manga.csv"
19
+
20
+ # ChromaDB paths - use environment variables with fallbacks
21
+ CHROMA_DB_PATH = Path(os.getenv("CHROMA_DB_PATH", str(BACKEND_DIR / "chroma_db")))
22
+ MANGA_CHROMA_DB_PATH = Path(os.getenv("MANGA_CHROMA_DB_PATH", str(BACKEND_DIR / "manga_chroma_db")))
23
+
24
+ # API Keys
25
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
26
+
27
+ # Model Settings
28
+ EMBEDDING_MODEL = "all-MiniLM-L6-v2"
29
+ LLM_MODEL = "llama-3.1-8b-instant" # Fast, free on Groq
30
+
31
+ # API Settings
32
+ JIKAN_BASE_URL = "https://api.jikan.moe/v4"
33
+ JIKAN_RATE_LIMIT = 3 # requests per second
backend/data/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Data module
backend/data/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (130 Bytes). View file
 
backend/data/__pycache__/anime_schema.cpython-313.pyc ADDED
Binary file (3.96 kB). View file
 
backend/data/__pycache__/data_loader.cpython-313.pyc ADDED
Binary file (10.4 kB). View file
 
backend/data/__pycache__/database.cpython-313.pyc ADDED
Binary file (4.38 kB). View file
 
backend/data/__pycache__/manga_loader.cpython-313.pyc ADDED
Binary file (7.02 kB). View file
 
backend/data/__pycache__/manga_schema.cpython-313.pyc ADDED
Binary file (2.65 kB). View file
 
backend/data/anime_schema.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Anime Data Models"""
2
+ from pydantic import BaseModel, Field
3
+ from typing import Optional
4
+ import ast
5
+
6
+
7
+ class Anime(BaseModel):
8
+ """Core anime data model"""
9
+ mal_id: int = Field(..., description="MyAnimeList ID")
10
+ title: str
11
+ title_english: Optional[str] = None
12
+ title_japanese: Optional[str] = None
13
+ media_type: str = "tv"
14
+ episodes: Optional[int] = None
15
+ status: str = "unknown"
16
+ score: Optional[float] = None
17
+ scored_by: Optional[int] = None
18
+ rank: Optional[int] = None
19
+ popularity: Optional[int] = None
20
+ favorites: Optional[int] = None
21
+ synopsis: Optional[str] = None
22
+ genres: list[str] = []
23
+ studios: list[str] = []
24
+ source: Optional[str] = None
25
+ rating: Optional[str] = None
26
+ image_url: Optional[str] = None
27
+ start_date: Optional[str] = None
28
+ end_date: Optional[str] = None
29
+
30
+
31
+ class AnimeSearchResult(BaseModel):
32
+ """Search result with similarity score"""
33
+ anime: Anime
34
+ similarity: float = Field(..., ge=0, le=1)
35
+
36
+
37
+ class ChatMessage(BaseModel):
38
+ """Chat message for AI recommendations"""
39
+ role: str = Field(..., pattern="^(user|assistant)$")
40
+ content: str
41
+
42
+
43
+ class RecommendationRequest(BaseModel):
44
+ """Request for AI recommendations"""
45
+ query: str
46
+ history: list[ChatMessage] = []
47
+ limit: int = Field(default=10, ge=1, le=50)
48
+
49
+
50
+ class ReviewSummary(BaseModel):
51
+ """Summarized review data"""
52
+ overall_sentiment: str # positive, negative, mixed
53
+ pros: list[str]
54
+ cons: list[str]
55
+ summary: str
56
+ aspect_scores: dict[str, float] = {} # story, animation, characters, etc.
57
+
58
+
59
+ def parse_list_field(value: str) -> list[str]:
60
+ """Parse stringified list from CSV"""
61
+ if not value or value == "[]" or isinstance(value, float):
62
+ return []
63
+ try:
64
+ # Handle Python list string format: "['Action', 'Adventure']"
65
+ return ast.literal_eval(value)
66
+ except (ValueError, SyntaxError):
67
+ # Handle comma-separated format
68
+ return [g.strip() for g in str(value).split(",") if g.strip()]
backend/data/data_loader.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Load and process anime dataset"""
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from typing import Generator
5
+ import sys
6
+ sys.path.insert(0, str(Path(__file__).parent.parent))
7
+
8
+ from config import DATASET_PATH
9
+ from data.anime_schema import Anime, parse_list_field
10
+
11
+
12
+ def load_anime_dataset(limit: int = None) -> pd.DataFrame:
13
+ """Load anime dataset from CSV"""
14
+ print(f"Loading dataset from {DATASET_PATH}...")
15
+
16
+ df = pd.read_csv(DATASET_PATH, nrows=limit)
17
+
18
+ # Rename columns to match our schema
19
+ column_mapping = {
20
+ "id": "mal_id",
21
+ "mean": "score",
22
+ "num_scoring_users": "scored_by",
23
+ "num_favorites": "favorites",
24
+ "main_picture_medium": "image_url",
25
+ "alternative_titles_en": "title_english",
26
+ "alternative_titles_ja": "title_japanese",
27
+ }
28
+ df = df.rename(columns=column_mapping)
29
+
30
+ print(f"Loaded {len(df)} anime entries")
31
+ return df
32
+
33
+
34
+ def parse_anime_row(row: pd.Series) -> Anime:
35
+ """Convert DataFrame row to Anime model"""
36
+ return Anime(
37
+ mal_id=int(row["mal_id"]),
38
+ title=str(row.get("title", "Unknown")),
39
+ title_english=row.get("title_english") if pd.notna(row.get("title_english")) else None,
40
+ title_japanese=row.get("title_japanese") if pd.notna(row.get("title_japanese")) else None,
41
+ media_type=str(row.get("media_type", "unknown")),
42
+ episodes=int(row["num_episodes"]) if pd.notna(row.get("num_episodes")) and row.get("num_episodes") != 0 else None,
43
+ status=str(row.get("status", "unknown")),
44
+ score=float(row["score"]) if pd.notna(row.get("score")) else None,
45
+ scored_by=int(row["scored_by"]) if pd.notna(row.get("scored_by")) else None,
46
+ rank=int(row["rank"]) if pd.notna(row.get("rank")) else None,
47
+ popularity=int(row["popularity"]) if pd.notna(row.get("popularity")) else None,
48
+ favorites=int(row["favorites"]) if pd.notna(row.get("favorites")) else None,
49
+ synopsis=str(row.get("synopsis", "")) if pd.notna(row.get("synopsis")) else None,
50
+ genres=parse_list_field(row.get("genres", "[]")),
51
+ studios=parse_list_field(row.get("studios", "[]")),
52
+ source=str(row.get("source")) if pd.notna(row.get("source")) else None,
53
+ rating=str(row.get("rating")) if pd.notna(row.get("rating")) else None,
54
+ image_url=str(row.get("image_url")) if pd.notna(row.get("image_url")) else None,
55
+ start_date=str(row.get("start_date")) if pd.notna(row.get("start_date")) else None,
56
+ end_date=str(row.get("end_date")) if pd.notna(row.get("end_date")) else None,
57
+ )
58
+
59
+
60
+ def iter_anime(df: pd.DataFrame) -> Generator[Anime, None, None]:
61
+ """Iterate over anime entries as Pydantic models"""
62
+ for _, row in df.iterrows():
63
+ try:
64
+ yield parse_anime_row(row)
65
+ except Exception as e:
66
+ print(f"Error parsing row {row.get('mal_id', 'unknown')}: {e}")
67
+ continue
68
+
69
+
70
+ def create_embedding_text(anime: Anime) -> str:
71
+ """Create text for embedding generation"""
72
+ parts = [anime.title]
73
+
74
+ if anime.title_english and anime.title_english != anime.title:
75
+ parts.append(anime.title_english)
76
+
77
+ if anime.genres:
78
+ parts.append(f"Genres: {', '.join(anime.genres)}")
79
+
80
+ if anime.synopsis:
81
+ # Truncate synopsis to prevent overly long embeddings
82
+ synopsis = anime.synopsis[:1000]
83
+ parts.append(synopsis)
84
+
85
+ # Extract scene keywords for better scene-based search
86
+ scene_keywords = extract_scene_keywords(synopsis, anime.genres or [])
87
+ if scene_keywords:
88
+ parts.append(f"Scenes and tropes: {', '.join(scene_keywords)}")
89
+
90
+ return " | ".join(parts)
91
+
92
+
93
+ # Scene/trope detection patterns
94
+ SCENE_PATTERNS = {
95
+ # Romantic scenes
96
+ "confession": ["confess", "confession", "i love you", "feelings for", "admit feelings"],
97
+ "rooftop scene": ["rooftop", "on the roof", "school rooftop"],
98
+ "beach episode": ["beach", "swimsuit", "ocean", "summer vacation"],
99
+ "festival date": ["festival", "fireworks", "yukata", "summer festival"],
100
+ "accidental kiss": ["accidental", "lips touched", "fell on"],
101
+
102
+ # Action scenes
103
+ "training arc": ["training", "train harder", "become stronger", "special training"],
104
+ "tournament arc": ["tournament", "competition", "championship", "finals"],
105
+ "final battle": ["final battle", "last fight", "ultimate showdown", "final boss"],
106
+ "power awakening": ["awakens", "hidden power", "true power", "unleash"],
107
+ "sacrifice": ["sacrifice", "gave their life", "protect everyone", "died saving"],
108
+
109
+ # Emotional scenes
110
+ "tearful goodbye": ["goodbye", "farewell", "parting", "separation"],
111
+ "death scene": ["death", "died", "killed", "passed away", "funeral"],
112
+ "reunion": ["reunite", "reunion", "meet again", "found each other"],
113
+ "flashback": ["flashback", "memories", "past", "childhood"],
114
+ "redemption arc": ["redemption", "atone", "make amends", "change their ways"],
115
+
116
+ # Character tropes
117
+ "overpowered protagonist": ["overpowered", "strongest", "unbeatable", "one punch", "no match"],
118
+ "hidden identity": ["secret identity", "hiding", "disguise", "true self"],
119
+ "underdog story": ["underdog", "weakest", "looked down upon", "prove them wrong"],
120
+ "transfer student": ["transfer student", "new student", "just arrived"],
121
+ "chosen one": ["chosen", "prophecy", "destined", "fate"],
122
+
123
+ # Setting/atmosphere
124
+ "post-apocalyptic": ["apocalypse", "post-apocalyptic", "destroyed world", "ruins"],
125
+ "isekai": ["another world", "transported", "reincarnated", "summoned to"],
126
+ "time loop": ["time loop", "repeating", "stuck in time", "groundhog"],
127
+ "school setting": ["high school", "academy", "school", "classroom"],
128
+ "dystopian": ["dystopia", "oppressive", "government control", "rebellion"],
129
+ }
130
+
131
+
132
+ def extract_scene_keywords(synopsis: str, genres: list[str]) -> list[str]:
133
+ """Extract scene/trope keywords from synopsis for better search"""
134
+ if not synopsis:
135
+ return []
136
+
137
+ synopsis_lower = synopsis.lower()
138
+ detected = []
139
+
140
+ for scene_name, patterns in SCENE_PATTERNS.items():
141
+ for pattern in patterns:
142
+ if pattern in synopsis_lower:
143
+ detected.append(scene_name)
144
+ break
145
+
146
+ # Add genre-based common tropes
147
+ genre_tropes = {
148
+ "Romance": ["love triangle", "slow burn romance"],
149
+ "Action": ["battle scenes", "fight choreography"],
150
+ "Comedy": ["comedic moments", "slapstick"],
151
+ "Drama": ["emotional moments", "character development"],
152
+ "Horror": ["scary scenes", "tension building"],
153
+ "Sports": ["match scenes", "team dynamics"],
154
+ "Music": ["performance scenes", "concert"],
155
+ }
156
+
157
+ for genre in genres:
158
+ if genre in genre_tropes:
159
+ detected.extend(genre_tropes[genre])
160
+
161
+ return list(set(detected))[:10] # Limit to 10 keywords
162
+
163
+
164
+ if __name__ == "__main__":
165
+ # Test loading
166
+ df = load_anime_dataset(limit=10)
167
+ for anime in iter_anime(df):
168
+ print(f"{anime.mal_id}: {anime.title} ({anime.score}) - {anime.genres}")
169
+ print(f" Embedding text: {create_embedding_text(anime)[:150]}...")
170
+ print()
backend/data/database.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Database setup and models"""
2
+ import os
3
+ from datetime import datetime
4
+ from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, ForeignKey, Enum as SQLEnum
5
+ from sqlalchemy.ext.declarative import declarative_base
6
+ from sqlalchemy.orm import sessionmaker, relationship
7
+ import enum
8
+
9
+
10
+ # Database path
11
+ DB_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "aniverse.db")
12
+ DATABASE_URL = f"sqlite:///{DB_PATH}"
13
+
14
+ # Create engine
15
+ engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
16
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
17
+ Base = declarative_base()
18
+
19
+
20
+ class AnimeStatus(enum.Enum):
21
+ """User's status for an anime"""
22
+ watching = "watching"
23
+ completed = "completed"
24
+ planned = "planned"
25
+ dropped = "dropped"
26
+ on_hold = "on_hold"
27
+
28
+
29
+ class User(Base):
30
+ """User account model"""
31
+ __tablename__ = "users"
32
+
33
+ id = Column(Integer, primary_key=True, index=True)
34
+ email = Column(String, unique=True, index=True, nullable=False)
35
+ username = Column(String, unique=True, index=True, nullable=False)
36
+ password_hash = Column(String, nullable=False)
37
+ created_at = Column(DateTime, default=datetime.utcnow)
38
+
39
+ # Relationships
40
+ anime_list = relationship("UserAnime", back_populates="user")
41
+ manga_list = relationship("UserManga", back_populates="user")
42
+
43
+
44
+ class UserAnime(Base):
45
+ """User's anime list entry"""
46
+ __tablename__ = "user_anime"
47
+
48
+ id = Column(Integer, primary_key=True, index=True)
49
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
50
+ anime_id = Column(Integer, nullable=False) # MAL ID
51
+ status = Column(SQLEnum(AnimeStatus), default=AnimeStatus.planned)
52
+ rating = Column(Float, nullable=True) # 1-10 scale
53
+ is_favorite = Column(Integer, default=0) # SQLite boolean
54
+ added_at = Column(DateTime, default=datetime.utcnow)
55
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
56
+
57
+ # Relationships
58
+ user = relationship("User", back_populates="anime_list")
59
+
60
+
61
+ class UserManga(Base):
62
+ """User's manga list entry"""
63
+ __tablename__ = "user_manga"
64
+
65
+ id = Column(Integer, primary_key=True, index=True)
66
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
67
+ manga_id = Column(Integer, nullable=False) # MAL ID
68
+ status = Column(SQLEnum(AnimeStatus), default=AnimeStatus.planned) # Reuse status enum
69
+ rating = Column(Float, nullable=True) # 1-10 scale
70
+ is_favorite = Column(Integer, default=0) # SQLite boolean
71
+ added_at = Column(DateTime, default=datetime.utcnow)
72
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
73
+
74
+ # Relationships
75
+ user = relationship("User", back_populates="manga_list")
76
+
77
+
78
+ def init_db():
79
+ """Initialize database tables"""
80
+ Base.metadata.create_all(bind=engine)
81
+ print(f"Database initialized at {DB_PATH}")
82
+
83
+
84
+ def get_db():
85
+ """Get database session"""
86
+ db = SessionLocal()
87
+ try:
88
+ yield db
89
+ finally:
90
+ db.close()
91
+
92
+
93
+ if __name__ == "__main__":
94
+ init_db()
95
+ print("Database tables created successfully!")
backend/data/manga_loader.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Load and process manga dataset"""
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from typing import Generator
5
+ import sys
6
+ sys.path.insert(0, str(Path(__file__).parent.parent))
7
+
8
+ from config import MANGA_DATASET_PATH
9
+ from data.manga_schema import Manga, parse_list_field
10
+
11
+
12
+ def load_manga_dataset(limit: int = None) -> pd.DataFrame:
13
+ """Load manga dataset from CSV"""
14
+ print(f"Loading manga dataset from {MANGA_DATASET_PATH}...")
15
+
16
+ df = pd.read_csv(MANGA_DATASET_PATH, nrows=limit)
17
+
18
+ # Clean up column names
19
+ df.columns = df.columns.str.strip()
20
+
21
+ print(f"Loaded {len(df)} manga entries")
22
+ print(f"Columns: {df.columns.tolist()}")
23
+ return df
24
+
25
+
26
+ def parse_manga_row(row: pd.Series) -> Manga:
27
+ """Convert DataFrame row to Manga model"""
28
+ # Extract mal_id from URL if available
29
+ mal_id = None
30
+ if pd.notna(row.get("page_url")):
31
+ try:
32
+ # URL format: https://myanimelist.net/manga/ID/title
33
+ url = str(row["page_url"])
34
+ parts = url.split("/manga/")
35
+ if len(parts) > 1:
36
+ mal_id = int(parts[1].split("/")[0])
37
+ except (ValueError, IndexError):
38
+ pass
39
+
40
+ if mal_id is None:
41
+ # Use index or unnamed column
42
+ mal_id = int(row.get("Unnamed: 0", row.name)) if pd.notna(row.get("Unnamed: 0")) else row.name
43
+
44
+ # Parse volumes
45
+ volumes = None
46
+ if pd.notna(row.get("Volumes")):
47
+ try:
48
+ vol_str = str(row["Volumes"]).strip()
49
+ if vol_str.isdigit():
50
+ volumes = int(vol_str)
51
+ except ValueError:
52
+ pass
53
+
54
+ # Parse score
55
+ score = None
56
+ if pd.notna(row.get("Score")):
57
+ try:
58
+ score = float(row["Score"])
59
+ except (ValueError, TypeError):
60
+ pass
61
+
62
+ # Parse members
63
+ members = None
64
+ if pd.notna(row.get("Members")):
65
+ try:
66
+ members = int(str(row["Members"]).replace(",", ""))
67
+ except (ValueError, TypeError):
68
+ pass
69
+
70
+ # Parse rank
71
+ rank = None
72
+ if pd.notna(row.get("Rank")):
73
+ try:
74
+ rank = int(row["Rank"])
75
+ except (ValueError, TypeError):
76
+ pass
77
+
78
+ return Manga(
79
+ mal_id=mal_id,
80
+ title=str(row.get("Title", "Unknown")).strip(),
81
+ media_type=str(row.get("Type", "Manga")).strip().lower() if pd.notna(row.get("Type")) else "manga",
82
+ volumes=volumes,
83
+ score=score,
84
+ rank=rank,
85
+ members=members,
86
+ published=str(row.get("Published")) if pd.notna(row.get("Published")) else None,
87
+ genres=parse_list_field(row.get("Genres", "[]")),
88
+ authors=parse_list_field(row.get("Authors", "[]")),
89
+ image_url=str(row.get("image_url")) if pd.notna(row.get("image_url")) else None,
90
+ )
91
+
92
+
93
+ def iter_manga(df: pd.DataFrame) -> Generator[Manga, None, None]:
94
+ """Iterate over manga entries as Pydantic models"""
95
+ for _, row in df.iterrows():
96
+ try:
97
+ yield parse_manga_row(row)
98
+ except Exception as e:
99
+ print(f"Error parsing manga row {row.get('Title', 'unknown')}: {e}")
100
+ continue
101
+
102
+
103
+ def create_manga_embedding_text(manga: Manga) -> str:
104
+ """Create text for embedding generation"""
105
+ parts = [manga.title]
106
+
107
+ if manga.genres:
108
+ parts.append(f"Genres: {', '.join(manga.genres)}")
109
+
110
+ if manga.media_type:
111
+ parts.append(f"Type: {manga.media_type}")
112
+
113
+ if manga.authors:
114
+ parts.append(f"Authors: {', '.join(manga.authors[:3])}")
115
+
116
+ if manga.synopsis:
117
+ synopsis = manga.synopsis[:1000]
118
+ parts.append(synopsis)
119
+
120
+ return " | ".join(parts)
121
+
122
+
123
+ if __name__ == "__main__":
124
+ # Test loading
125
+ df = load_manga_dataset(limit=10)
126
+ for manga in iter_manga(df):
127
+ print(f"{manga.mal_id}: {manga.title} (Score: {manga.score}) - {manga.genres}")
128
+ print(f" Type: {manga.media_type}, Volumes: {manga.volumes}")
129
+ print()
backend/data/manga_schema.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Manga data schema"""
2
+ from pydantic import BaseModel
3
+ from typing import Optional
4
+ import ast
5
+
6
+
7
+ class Manga(BaseModel):
8
+ """Manga entry with MAL-style fields"""
9
+ mal_id: int
10
+ title: str
11
+ title_english: Optional[str] = None
12
+ media_type: str = "manga" # manga, manhwa, manhua, novel, light_novel
13
+ volumes: Optional[int] = None
14
+ chapters: Optional[int] = None
15
+ status: Optional[str] = None # publishing, finished
16
+ score: Optional[float] = None
17
+ scored_by: Optional[int] = None
18
+ rank: Optional[int] = None
19
+ popularity: Optional[int] = None
20
+ members: Optional[int] = None
21
+ favorites: Optional[int] = None
22
+ synopsis: Optional[str] = None
23
+ genres: list[str] = []
24
+ authors: list[str] = []
25
+ image_url: Optional[str] = None
26
+ published: Optional[str] = None
27
+
28
+
29
+ def parse_list_field(value) -> list[str]:
30
+ """Parse string list field from CSV"""
31
+ if not value or (isinstance(value, float) and str(value) == 'nan'):
32
+ return []
33
+
34
+ if isinstance(value, list):
35
+ return value
36
+
37
+ if isinstance(value, str):
38
+ value = value.strip()
39
+ if value.startswith('['):
40
+ try:
41
+ parsed = ast.literal_eval(value)
42
+ return [str(item) for item in parsed if item]
43
+ except (ValueError, SyntaxError):
44
+ pass
45
+
46
+ # Try comma-separated
47
+ if ',' in value:
48
+ return [v.strip() for v in value.split(',') if v.strip()]
49
+
50
+ return [value] if value else []
51
+
52
+ return []
backend/embeddings/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Embeddings module
backend/embeddings/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (136 Bytes). View file
 
backend/embeddings/__pycache__/chroma_store.cpython-313.pyc ADDED
Binary file (5.84 kB). View file
 
backend/embeddings/__pycache__/manga_chroma_store.cpython-313.pyc ADDED
Binary file (5.88 kB). View file
 
backend/embeddings/__pycache__/search_utils.cpython-313.pyc ADDED
Binary file (4.81 kB). View file
 
backend/embeddings/build_embeddings.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Build anime embeddings and populate vector store"""
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ sys.path.insert(0, str(Path(__file__).parent.parent))
6
+
7
+ from data.data_loader import load_anime_dataset, iter_anime, create_embedding_text
8
+ from embeddings.chroma_store import get_vector_store
9
+
10
+
11
+ def build_embeddings(limit: int = None, batch_size: int = 100):
12
+ """Build embeddings for all anime and store in ChromaDB"""
13
+ print("=" * 50)
14
+ print("AniVerse Embedding Builder")
15
+ print("=" * 50)
16
+
17
+ # Load dataset
18
+ df = load_anime_dataset(limit=limit)
19
+
20
+ # Initialize vector store
21
+ store = get_vector_store()
22
+ existing_count = store.get_count()
23
+ print(f"Existing entries in vector store: {existing_count}")
24
+
25
+ # Prepare batch data
26
+ ids = []
27
+ texts = []
28
+ metadatas = []
29
+
30
+ print("Processing anime entries...")
31
+ for anime in iter_anime(df):
32
+ # Skip entries without synopsis (poor embeddings)
33
+ if not anime.synopsis or len(anime.synopsis) < 20:
34
+ continue
35
+
36
+ ids.append(anime.mal_id)
37
+ texts.append(create_embedding_text(anime))
38
+ metadatas.append({
39
+ "title": anime.title,
40
+ "score": anime.score or 0,
41
+ "genres": ", ".join(anime.genres) if anime.genres else "",
42
+ "media_type": anime.media_type,
43
+ "status": anime.status,
44
+ "image_url": anime.image_url or "",
45
+ })
46
+
47
+ print(f"Prepared {len(ids)} anime entries for embedding")
48
+
49
+ # Add to vector store
50
+ print("Generating embeddings and storing in ChromaDB...")
51
+ store.add_batch(ids, texts, metadatas, batch_size=batch_size)
52
+
53
+ print("=" * 50)
54
+ print(f"Complete! Vector store now has {store.get_count()} entries")
55
+ print("=" * 50)
56
+
57
+
58
+ if __name__ == "__main__":
59
+ import argparse
60
+
61
+ parser = argparse.ArgumentParser(description="Build anime embeddings")
62
+ parser.add_argument("--limit", type=int, default=None, help="Limit number of entries to process")
63
+ parser.add_argument("--batch-size", type=int, default=100, help="Batch size for embedding generation")
64
+
65
+ args = parser.parse_args()
66
+ build_embeddings(limit=args.limit, batch_size=args.batch_size)
backend/embeddings/build_manga_embeddings.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Build manga embeddings and store in ChromaDB"""
2
+ import sys
3
+ from pathlib import Path
4
+ sys.path.insert(0, str(Path(__file__).parent.parent))
5
+
6
+ from data.manga_loader import load_manga_dataset, iter_manga, create_manga_embedding_text
7
+ from embeddings.manga_chroma_store import MangaVectorStore
8
+
9
+
10
+ def build_manga_embeddings(limit: int = None):
11
+ """Build embeddings for manga dataset"""
12
+ print("="*50)
13
+ print("Building Manga Embeddings")
14
+ print("="*50)
15
+
16
+ # Load dataset
17
+ df = load_manga_dataset(limit=limit)
18
+
19
+ # Initialize vector store
20
+ store = MangaVectorStore()
21
+
22
+ # Collect data for batch insert
23
+ ids = []
24
+ texts = []
25
+ metadatas = []
26
+
27
+ print("\nProcessing manga entries...")
28
+ for manga in iter_manga(df):
29
+ embedding_text = create_manga_embedding_text(manga)
30
+
31
+ metadata = {
32
+ "title": manga.title,
33
+ "media_type": manga.media_type or "manga",
34
+ "score": manga.score or 0,
35
+ "rank": manga.rank or 0,
36
+ "members": manga.members or 0,
37
+ "volumes": manga.volumes or 0,
38
+ "genres": ", ".join(manga.genres) if manga.genres else "",
39
+ "authors": ", ".join(manga.authors[:3]) if manga.authors else "",
40
+ "image_url": manga.image_url or "",
41
+ "published": manga.published or "",
42
+ }
43
+
44
+ ids.append(manga.mal_id)
45
+ texts.append(embedding_text)
46
+ metadatas.append(metadata)
47
+
48
+ print(f"\nAdding {len(ids)} manga to vector store...")
49
+ store.add_batch(ids, texts, metadatas, batch_size=100)
50
+
51
+ print(f"\n✓ Successfully indexed {store.get_count()} manga entries!")
52
+ print("="*50)
53
+
54
+
55
+ if __name__ == "__main__":
56
+ import argparse
57
+ parser = argparse.ArgumentParser(description="Build manga embeddings")
58
+ parser.add_argument("--limit", type=int, help="Limit number of entries")
59
+ args = parser.parse_args()
60
+
61
+ build_manga_embeddings(limit=args.limit)
backend/embeddings/chroma_store.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ChromaDB Vector Store for Anime Similarity Search"""
2
+ import chromadb
3
+ from chromadb.config import Settings
4
+ from typing import Optional
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ sys.path.insert(0, str(Path(__file__).parent.parent))
9
+ from config import CHROMA_DB_PATH, EMBEDDING_MODEL
10
+
11
+
12
+ class AnimeVectorStore:
13
+ """Vector database for anime semantic search"""
14
+
15
+ def __init__(self, persist_directory: str = None):
16
+ self.persist_dir = persist_directory or str(CHROMA_DB_PATH)
17
+
18
+ try:
19
+ # Initialize ChromaDB client with telemetry disabled
20
+ self.client = chromadb.PersistentClient(
21
+ path=self.persist_dir,
22
+ settings=Settings(
23
+ anonymized_telemetry=False,
24
+ allow_reset=True
25
+ )
26
+ )
27
+
28
+ # Use sentence-transformers for embeddings (more compatible than onnxruntime)
29
+ from chromadb.utils import embedding_functions
30
+ self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
31
+ model_name=EMBEDDING_MODEL
32
+ )
33
+
34
+ # Get or create anime collection with embedding function
35
+ self.collection = self.client.get_or_create_collection(
36
+ name="anime",
37
+ metadata={"hnsw:space": "cosine"},
38
+ embedding_function=self.embedding_fn
39
+ )
40
+
41
+ print(f"Vector store initialized at {self.persist_dir}")
42
+ print(f"Collection count: {self.collection.count()}")
43
+ except Exception as e:
44
+ print(f"ERROR initializing vector store: {e}")
45
+ import traceback
46
+ traceback.print_exc()
47
+ raise
48
+
49
+ def add_anime(
50
+ self,
51
+ mal_id: int,
52
+ embedding_text: str,
53
+ metadata: dict
54
+ ) -> None:
55
+ """Add or update anime entry in vector store"""
56
+ # Upsert to collection (embeddings auto-generated)
57
+ self.collection.upsert(
58
+ ids=[str(mal_id)],
59
+ documents=[embedding_text],
60
+ metadatas=[metadata]
61
+ )
62
+
63
+ def add_batch(
64
+ self,
65
+ ids: list[int],
66
+ texts: list[str],
67
+ metadatas: list[dict],
68
+ batch_size: int = 100
69
+ ) -> None:
70
+ """Add multiple anime entries in batches"""
71
+ total = len(ids)
72
+ for i in range(0, total, batch_size):
73
+ batch_ids = [str(id_) for id_ in ids[i:i+batch_size]]
74
+ batch_texts = texts[i:i+batch_size]
75
+ batch_meta = metadatas[i:i+batch_size]
76
+
77
+ self.collection.upsert(
78
+ ids=batch_ids,
79
+ documents=batch_texts,
80
+ metadatas=batch_meta
81
+ )
82
+
83
+ print(f" Added {min(i+batch_size, total)}/{total} entries...")
84
+
85
+ def search(
86
+ self,
87
+ query: str,
88
+ n_results: int = 10,
89
+ where: Optional[dict] = None
90
+ ) -> list[dict]:
91
+ """Search for similar anime by text query"""
92
+ # Query ChromaDB (embedding auto-generated from query)
93
+ results = self.collection.query(
94
+ query_texts=[query],
95
+ n_results=n_results,
96
+ where=where,
97
+ include=["metadatas", "documents", "distances"]
98
+ )
99
+
100
+ # Format results
101
+ formatted = []
102
+ for i, mal_id in enumerate(results["ids"][0]):
103
+ formatted.append({
104
+ "mal_id": int(mal_id),
105
+ "metadata": results["metadatas"][0][i],
106
+ "document": results["documents"][0][i],
107
+ "similarity": 1 - results["distances"][0][i] # Convert distance to similarity
108
+ })
109
+
110
+ return formatted
111
+
112
+ def search_similar(
113
+ self,
114
+ mal_id: int,
115
+ n_results: int = 10
116
+ ) -> list[dict]:
117
+ """Find anime similar to a given anime by MAL ID"""
118
+ # Get the anime's document
119
+ result = self.collection.get(
120
+ ids=[str(mal_id)],
121
+ include=["documents"]
122
+ )
123
+
124
+ if not result["documents"]:
125
+ return []
126
+
127
+ # Query with that document
128
+ results = self.collection.query(
129
+ query_texts=result["documents"],
130
+ n_results=n_results + 1, # +1 to exclude self
131
+ include=["metadatas", "documents", "distances"]
132
+ )
133
+
134
+ # Format and exclude self
135
+ formatted = []
136
+ for i, id_ in enumerate(results["ids"][0]):
137
+ if int(id_) == mal_id:
138
+ continue
139
+ formatted.append({
140
+ "mal_id": int(id_),
141
+ "metadata": results["metadatas"][0][i],
142
+ "document": results["documents"][0][i],
143
+ "similarity": 1 - results["distances"][0][i]
144
+ })
145
+
146
+ return formatted[:n_results]
147
+
148
+ def get_count(self) -> int:
149
+ """Get total number of entries in the collection"""
150
+ return self.collection.count()
151
+
152
+
153
+ # Singleton instance
154
+ _store: Optional[AnimeVectorStore] = None
155
+
156
+
157
+ def get_vector_store() -> AnimeVectorStore:
158
+ """Get or create vector store instance"""
159
+ global _store
160
+ if _store is None:
161
+ _store = AnimeVectorStore()
162
+ return _store
backend/embeddings/manga_chroma_store.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ChromaDB Vector Store for Manga Similarity Search"""
2
+ import chromadb
3
+ from chromadb.config import Settings
4
+ from chromadb.utils import embedding_functions
5
+ from typing import Optional
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ sys.path.insert(0, str(Path(__file__).parent.parent))
10
+ from config import MANGA_CHROMA_DB_PATH, EMBEDDING_MODEL
11
+
12
+
13
+ class MangaVectorStore:
14
+ """Vector database for manga semantic search"""
15
+
16
+ def __init__(self, persist_directory: str = None):
17
+ self.persist_dir = persist_directory or str(MANGA_CHROMA_DB_PATH)
18
+
19
+ try:
20
+ # Initialize ChromaDB client
21
+ self.client = chromadb.PersistentClient(
22
+ path=self.persist_dir,
23
+ settings=Settings(
24
+ anonymized_telemetry=False,
25
+ allow_reset=True
26
+ )
27
+ )
28
+
29
+ # Use sentence-transformers for embeddings (more compatible)
30
+ self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
31
+ model_name=EMBEDDING_MODEL
32
+ )
33
+
34
+ # Get or create manga collection with embedding function
35
+ self.collection = self.client.get_or_create_collection(
36
+ name="manga",
37
+ metadata={"hnsw:space": "cosine"},
38
+ embedding_function=self.embedding_fn
39
+ )
40
+
41
+ print(f"Manga vector store initialized at {self.persist_dir}")
42
+ print(f"Manga collection count: {self.collection.count()}")
43
+ except Exception as e:
44
+ print(f"ERROR initializing manga vector store: {e}")
45
+ import traceback
46
+ traceback.print_exc()
47
+ raise
48
+
49
+ def add_manga(
50
+ self,
51
+ mal_id: int,
52
+ embedding_text: str,
53
+ metadata: dict
54
+ ) -> None:
55
+ """Add or update manga entry in vector store"""
56
+ self.collection.upsert(
57
+ ids=[str(mal_id)],
58
+ documents=[embedding_text],
59
+ metadatas=[metadata]
60
+ )
61
+
62
+ def add_batch(
63
+ self,
64
+ ids: list[int],
65
+ texts: list[str],
66
+ metadatas: list[dict],
67
+ batch_size: int = 100
68
+ ) -> None:
69
+ """Add multiple manga entries in batches"""
70
+ total = len(ids)
71
+ for i in range(0, total, batch_size):
72
+ batch_ids = [str(id_) for id_ in ids[i:i+batch_size]]
73
+ batch_texts = texts[i:i+batch_size]
74
+ batch_meta = metadatas[i:i+batch_size]
75
+
76
+ self.collection.upsert(
77
+ ids=batch_ids,
78
+ documents=batch_texts,
79
+ metadatas=batch_meta
80
+ )
81
+
82
+ print(f" Added {min(i+batch_size, total)}/{total} manga entries...")
83
+
84
+ def search(
85
+ self,
86
+ query: str,
87
+ n_results: int = 10,
88
+ where: Optional[dict] = None
89
+ ) -> list[dict]:
90
+ """Search for similar manga by text query"""
91
+ results = self.collection.query(
92
+ query_texts=[query],
93
+ n_results=n_results,
94
+ where=where,
95
+ include=["metadatas", "documents", "distances"]
96
+ )
97
+
98
+ formatted = []
99
+ for i, mal_id in enumerate(results["ids"][0]):
100
+ formatted.append({
101
+ "mal_id": int(mal_id),
102
+ "metadata": results["metadatas"][0][i],
103
+ "document": results["documents"][0][i],
104
+ "similarity": 1 - results["distances"][0][i]
105
+ })
106
+
107
+ return formatted
108
+
109
+ def search_similar(
110
+ self,
111
+ mal_id: int,
112
+ n_results: int = 10
113
+ ) -> list[dict]:
114
+ """Find manga similar to a given manga by MAL ID"""
115
+ result = self.collection.get(
116
+ ids=[str(mal_id)],
117
+ include=["documents"]
118
+ )
119
+
120
+ if not result["documents"]:
121
+ return []
122
+
123
+ results = self.collection.query(
124
+ query_texts=result["documents"],
125
+ n_results=n_results + 1,
126
+ include=["metadatas", "documents", "distances"]
127
+ )
128
+
129
+ formatted = []
130
+ for i, id_ in enumerate(results["ids"][0]):
131
+ if int(id_) == mal_id:
132
+ continue
133
+ formatted.append({
134
+ "mal_id": int(id_),
135
+ "metadata": results["metadatas"][0][i],
136
+ "document": results["documents"][0][i],
137
+ "similarity": 1 - results["distances"][0][i]
138
+ })
139
+
140
+ return formatted[:n_results]
141
+
142
+ def get_count(self) -> int:
143
+ """Get total number of entries in the collection"""
144
+ return self.collection.count()
145
+
146
+
147
+ # Singleton instance
148
+ _manga_store: Optional[MangaVectorStore] = None
149
+
150
+
151
+ def get_manga_vector_store() -> MangaVectorStore:
152
+ """Get or create manga vector store instance"""
153
+ global _manga_store
154
+ if _manga_store is None:
155
+ _manga_store = MangaVectorStore()
156
+ return _manga_store
backend/embeddings/search_utils.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Search utilities for improved ranking and filtering"""
2
+ from typing import Optional
3
+
4
+
5
+ def calculate_combined_score(
6
+ similarity: float,
7
+ anime_score: float,
8
+ popularity: int = None,
9
+ weight_similarity: float = 0.6,
10
+ weight_anime_score: float = 0.3,
11
+ weight_popularity: float = 0.1
12
+ ) -> float:
13
+ """
14
+ Calculate a combined ranking score.
15
+
16
+ Args:
17
+ similarity: Vector similarity (0-1)
18
+ anime_score: MAL score (0-10)
19
+ popularity: Popularity rank (lower is better)
20
+ weight_*: Weights for each factor
21
+
22
+ Returns:
23
+ Combined score (0-1)
24
+ """
25
+ # Normalize anime score to 0-1
26
+ normalized_score = (anime_score or 0) / 10
27
+
28
+ # Normalize popularity (inverse, since lower rank = more popular)
29
+ normalized_pop = 0.5 # Default if not available
30
+ if popularity and popularity > 0:
31
+ # Map rank 1-1000 to 1-0.5, rank > 1000 to 0.5-0.1
32
+ if popularity <= 1000:
33
+ normalized_pop = 1 - (popularity / 2000)
34
+ else:
35
+ normalized_pop = max(0.1, 0.5 - (popularity - 1000) / 20000)
36
+
37
+ combined = (
38
+ weight_similarity * similarity +
39
+ weight_anime_score * normalized_score +
40
+ weight_popularity * normalized_pop
41
+ )
42
+
43
+ return round(combined, 4)
44
+
45
+
46
+ def rerank_results(results: list[dict], limit: int = 15) -> list[dict]:
47
+ """
48
+ Rerank search results using combined scoring.
49
+
50
+ Args:
51
+ results: List of search results with metadata
52
+ limit: Max results to return
53
+
54
+ Returns:
55
+ Reranked and limited results
56
+ """
57
+ for r in results:
58
+ r["combined_score"] = calculate_combined_score(
59
+ similarity=r.get("similarity", 0),
60
+ anime_score=r.get("metadata", {}).get("score", 0),
61
+ popularity=r.get("metadata", {}).get("popularity")
62
+ )
63
+
64
+ # Sort by combined score
65
+ reranked = sorted(results, key=lambda x: x["combined_score"], reverse=True)
66
+
67
+ return reranked[:limit]
68
+
69
+
70
+ def build_genre_filter(genres: list[str]) -> dict:
71
+ """Build ChromaDB where filter for genres"""
72
+ if not genres:
73
+ return None
74
+
75
+ # ChromaDB uses $contains for partial string match
76
+ if len(genres) == 1:
77
+ return {"genres": {"$contains": genres[0]}}
78
+
79
+ # Multiple genres: any match
80
+ return {"$or": [{"genres": {"$contains": g}} for g in genres]}
81
+
82
+
83
+ def extract_keywords(query: str) -> list[str]:
84
+ """Extract important keywords from search query"""
85
+ # Common words to ignore
86
+ stop_words = {
87
+ "anime", "like", "similar", "to", "with", "the", "a", "an", "and", "or",
88
+ "that", "has", "have", "good", "best", "top", "show", "series", "want",
89
+ "looking", "for", "something", "recommend", "me", "please", "i", "my"
90
+ }
91
+
92
+ words = query.lower().split()
93
+ keywords = [w.strip(",.!?") for w in words if w.strip(",.!?") not in stop_words]
94
+
95
+ return keywords
96
+
97
+
98
+ # Genre keyword mappings for better matching
99
+ GENRE_KEYWORDS = {
100
+ "action": ["action", "fight", "battle", "combat", "war"],
101
+ "romance": ["romance", "love", "romantic", "relationship", "dating"],
102
+ "comedy": ["comedy", "funny", "humor", "hilarious", "laugh"],
103
+ "drama": ["drama", "emotional", "feels", "sad", "tear"],
104
+ "horror": ["horror", "scary", "terrifying", "creepy", "dark"],
105
+ "psychological": ["psychological", "mind", "mental", "thriller", "mindbending"],
106
+ "slice of life": ["slice of life", "daily", "everyday", "relaxing", "wholesome"],
107
+ "fantasy": ["fantasy", "magic", "wizard", "isekai", "magical"],
108
+ "sci-fi": ["sci-fi", "scifi", "science fiction", "future", "space", "mecha"],
109
+ "sports": ["sports", "basketball", "soccer", "volleyball", "baseball"],
110
+ "mystery": ["mystery", "detective", "investigation", "whodunit"],
111
+ "supernatural": ["supernatural", "ghost", "spirit", "demon", "paranormal"],
112
+ }
113
+
114
+
115
+ def detect_genres_from_query(query: str) -> list[str]:
116
+ """Detect genre preferences from natural language query"""
117
+ query_lower = query.lower()
118
+ detected = []
119
+
120
+ for genre, keywords in GENRE_KEYWORDS.items():
121
+ for kw in keywords:
122
+ if kw in query_lower:
123
+ detected.append(genre.title())
124
+ break
125
+
126
+ return detected
backend/llm/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # LLM module
backend/llm/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (129 Bytes). View file
 
backend/llm/__pycache__/groq_client.cpython-313.pyc ADDED
Binary file (6.37 kB). View file
 
backend/llm/groq_client.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Groq LLM Client for AI Recommendations"""
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ sys.path.insert(0, str(Path(__file__).parent.parent))
7
+ from config import GROQ_API_KEY, LLM_MODEL
8
+
9
+ try:
10
+ from groq import Groq
11
+ except ImportError:
12
+ Groq = None
13
+
14
+
15
+ SYSTEM_PROMPT = """You are AniVerse AI, an expert anime and manga recommendation assistant.
16
+
17
+ ## YOUR CORE MISSION
18
+ Provide HIGHLY RELEVANT, PRECISE recommendations. Quality over quantity. Every suggestion must directly address what the user is looking for.
19
+
20
+ ## RECOMMENDATION RULES
21
+ 1. **Match the Query Exactly**: If user asks for "dark fantasy", recommend dark fantasy - not action comedy.
22
+ 2. **Use Context Wisely**: Reference the "Relevant Anime/Manga" data provided. These are semantically matched to the query.
23
+ 3. **Explain Your Picks**: For EACH recommendation, give 1-2 sentences on WHY it fits the request.
24
+ 4. **Limit Recommendations**: Suggest 2-4 titles max per response. Be selective.
25
+ 5. **Format Clearly**: Use bold for titles, include scores and genres inline.
26
+
27
+ ## PERSONALIZATION (When User Profile Available)
28
+ - Reference their high-rated titles: "Since you gave Attack on Titan a 9..."
29
+ - Avoid genres from low-rated shows
30
+ - Connect new suggestions to their favorites
31
+
32
+ ## RESPONSE FORMAT
33
+ When recommending, use this structure:
34
+ **[Title]** (★ score/10) - [Brief reason why this matches their request]
35
+
36
+ ## GUIDELINES
37
+ - Be enthusiastic but concise
38
+ - No spoilers
39
+ - If the context doesn't have good matches, say so honestly
40
+ - You can discuss plots, characters, and themes
41
+ - Support both anime AND manga recommendations
42
+
43
+ Context about relevant titles will be provided below."""
44
+
45
+
46
+ class GroqClient:
47
+ """Groq LLM client for AI-powered recommendations"""
48
+
49
+ def __init__(self):
50
+ if not Groq:
51
+ raise ImportError("groq package not installed. Run: pip install groq")
52
+
53
+ if not GROQ_API_KEY:
54
+ raise ValueError("GROQ_API_KEY not set. Add it to your .env file")
55
+
56
+ self.client = Groq(api_key=GROQ_API_KEY)
57
+ self.model = LLM_MODEL
58
+
59
+ def chat(
60
+ self,
61
+ user_message: str,
62
+ context: str = "",
63
+ history: list[dict] = None,
64
+ max_tokens: int = 1024
65
+ ) -> str:
66
+ """Send a chat message and get a response"""
67
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
68
+
69
+ # Add context if provided
70
+ if context:
71
+ messages.append({
72
+ "role": "system",
73
+ "content": f"Here is relevant anime data from our database:\n\n{context}"
74
+ })
75
+
76
+ # Add conversation history
77
+ if history:
78
+ messages.extend(history)
79
+
80
+ # Add current user message
81
+ messages.append({"role": "user", "content": user_message})
82
+
83
+ # Call Groq API
84
+ response = self.client.chat.completions.create(
85
+ model=self.model,
86
+ messages=messages,
87
+ max_tokens=max_tokens,
88
+ temperature=0.7,
89
+ )
90
+
91
+ return response.choices[0].message.content
92
+
93
+ def summarize_reviews(
94
+ self,
95
+ reviews: list[str],
96
+ anime_title: str
97
+ ) -> dict:
98
+ """Summarize multiple reviews into pros/cons"""
99
+ reviews_text = "\n---\n".join(reviews[:10]) # Limit to 10 reviews
100
+
101
+ prompt = f"""Analyze these reviews for "{anime_title}" and provide:
102
+ 1. Overall sentiment (positive/negative/mixed)
103
+ 2. Top 3 pros (things reviewers loved)
104
+ 3. Top 3 cons (things reviewers criticized)
105
+ 4. A 2-3 sentence summary
106
+ 5. Aspect scores (1-10) for: story, animation, characters, music, enjoyment
107
+
108
+ Reviews:
109
+ {reviews_text}
110
+
111
+ Respond in JSON format:
112
+ {{
113
+ "sentiment": "positive|negative|mixed",
114
+ "pros": ["pro1", "pro2", "pro3"],
115
+ "cons": ["con1", "con2", "con3"],
116
+ "summary": "...",
117
+ "aspects": {{"story": 8, "animation": 9, ...}}
118
+ }}"""
119
+
120
+ response = self.chat(prompt, max_tokens=512)
121
+
122
+ # Parse JSON response (with fallback)
123
+ import json
124
+ try:
125
+ return json.loads(response)
126
+ except json.JSONDecodeError:
127
+ return {
128
+ "sentiment": "mixed",
129
+ "pros": [],
130
+ "cons": [],
131
+ "summary": response,
132
+ "aspects": {}
133
+ }
134
+
135
+ def generate_recommendation_reason(
136
+ self,
137
+ user_query: str,
138
+ anime_data: dict
139
+ ) -> str:
140
+ """Generate a personalized reason why an anime matches the user's request"""
141
+ prompt = f"""The user asked: "{user_query}"
142
+
143
+ This anime was matched:
144
+ - Title: {anime_data.get('title', 'Unknown')}
145
+ - Genres: {anime_data.get('genres', 'Unknown')}
146
+ - Score: {anime_data.get('score', 'N/A')}
147
+
148
+ In 1-2 sentences, explain why this anime matches what the user is looking for. Be specific about the connection."""
149
+
150
+ return self.chat(prompt, max_tokens=150)
151
+
152
+
153
+ # Singleton
154
+ _client: Optional[GroqClient] = None
155
+
156
+
157
+ def get_llm_client() -> GroqClient:
158
+ """Get or create LLM client instance"""
159
+ global _client
160
+ if _client is None:
161
+ _client = GroqClient()
162
+ return _client
backend/main.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AniVerse API - Main Entry Point"""
2
+ from fastapi import FastAPI
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ # Add backend to path
8
+ sys.path.insert(0, str(Path(__file__).parent))
9
+
10
+ from routes import search, chat, anime, auth, lists, recommendations, mal_import, manga
11
+
12
+ # Create FastAPI app
13
+ app = FastAPI(
14
+ title="AniVerse API",
15
+ description="AI-powered anime & manga discovery platform with semantic search, personalized recommendations, and user lists",
16
+ version="2.0.0",
17
+ docs_url="/docs",
18
+ redoc_url="/redoc",
19
+ )
20
+
21
+ # CORS middleware - Allow ALL origins for cross-domain requests
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"],
25
+ allow_credentials=False, # Must be False when using wildcard
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ # Include routers
31
+ app.include_router(search.router)
32
+ app.include_router(chat.router)
33
+ app.include_router(anime.router)
34
+ app.include_router(auth.router)
35
+ app.include_router(lists.router)
36
+ app.include_router(recommendations.router)
37
+ app.include_router(mal_import.router)
38
+ app.include_router(manga.router)
39
+
40
+
41
+ @app.get("/")
42
+ async def root():
43
+ """API root - health check and info"""
44
+ return {
45
+ "name": "AniVerse API",
46
+ "version": "2.0.0",
47
+ "status": "running",
48
+ "endpoints": {
49
+ "docs": "/docs",
50
+ "search": "/api/search",
51
+ "chat": "/api/chat",
52
+ "anime": "/api/anime",
53
+ "manga": "/api/manga",
54
+ "auth": "/api/auth",
55
+ "lists": "/api/lists",
56
+ "recommendations": "/api/recommendations",
57
+ }
58
+ }
59
+
60
+
61
+ @app.get("/api/health")
62
+ async def health_check():
63
+ """Health check endpoint for Docker/k8s"""
64
+ return {"status": "healthy"}
65
+
66
+
67
+ @app.get("/api/stats")
68
+ async def get_stats():
69
+ """Get database statistics"""
70
+ from embeddings.chroma_store import get_vector_store
71
+ from embeddings.manga_chroma_store import get_manga_vector_store
72
+ from config import DATASET_PATH
73
+ import pandas as pd
74
+
75
+ anime_store = get_vector_store()
76
+ manga_store = get_manga_vector_store()
77
+ df = pd.read_csv(DATASET_PATH)
78
+
79
+ return {
80
+ "total_anime": len(df),
81
+ "indexed_anime": anime_store.get_count(),
82
+ "indexed_manga": manga_store.get_count(),
83
+ }
84
+
85
+
86
+ if __name__ == "__main__":
87
+ import uvicorn
88
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a055086a635d2cacb3a426111c19a9b788478cd9a55baaea33036c6cbf5b2b13
3
+ size 29851236
backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fdf8dfc32fae317f2cff4ec0a5f920a6591ff14b0f8721ad6c584b713d592dd
3
+ size 100
backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7f71f0b2f29d673b4c6a282b9af736e2a498d132f162c4840b3d9aeb1501c89
3
+ size 535698
backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b4cc8354a23c80c0abaf4485b174ea8bd44dc51c76c5d0acc1218cc1173df6e
3
+ size 71244
backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c442cbb882eeb83457c7747aff45504845f000234c962b652e33dc5c779cb82
3
+ size 157876
backend/manga_chroma_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e98a1267fe754f0ebf174c81cc5fe9823375d1fc66d7f78d462550a9ec5d68
3
+ size 29241344