Spaces:

Penguindrum920
/

aniverse

Sleeping

App Files Files Community

Penguindrum920 commited on Jan 23

Commit

a1614c5

verified ·

1 Parent(s): f7f4c71

Upload 72 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
backend/.dockerignore +21 -0
backend/.env.example +29 -0
backend/.python-version +1 -0
backend/Dockerfile +28 -0
backend/README.md +8 -0
backend/__pycache__/config.cpython-313.pyc +0 -0
backend/__pycache__/main.cpython-313.pyc +0 -0
backend/__pycache__/security.cpython-313.pyc +0 -0
backend/aniverse.db +0 -0
backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/data_level0.bin +3 -0
backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/header.bin +3 -0
backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/index_metadata.pickle +3 -0
backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/length.bin +3 -0
backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/link_lists.bin +3 -0
backend/chroma_db/chroma.sqlite3 +3 -0
backend/config.py +33 -0
backend/data/__init__.py +1 -0
backend/data/__pycache__/__init__.cpython-313.pyc +0 -0
backend/data/__pycache__/anime_schema.cpython-313.pyc +0 -0
backend/data/__pycache__/data_loader.cpython-313.pyc +0 -0
backend/data/__pycache__/database.cpython-313.pyc +0 -0
backend/data/__pycache__/manga_loader.cpython-313.pyc +0 -0
backend/data/__pycache__/manga_schema.cpython-313.pyc +0 -0
backend/data/anime_schema.py +68 -0
backend/data/data_loader.py +170 -0
backend/data/database.py +95 -0
backend/data/manga_loader.py +129 -0
backend/data/manga_schema.py +52 -0
backend/embeddings/__init__.py +1 -0
backend/embeddings/__pycache__/__init__.cpython-313.pyc +0 -0
backend/embeddings/__pycache__/chroma_store.cpython-313.pyc +0 -0
backend/embeddings/__pycache__/manga_chroma_store.cpython-313.pyc +0 -0
backend/embeddings/__pycache__/search_utils.cpython-313.pyc +0 -0
backend/embeddings/build_embeddings.py +66 -0
backend/embeddings/build_manga_embeddings.py +61 -0
backend/embeddings/chroma_store.py +162 -0
backend/embeddings/manga_chroma_store.py +156 -0
backend/embeddings/search_utils.py +126 -0
backend/llm/__init__.py +1 -0
backend/llm/__pycache__/__init__.cpython-313.pyc +0 -0
backend/llm/__pycache__/groq_client.cpython-313.pyc +0 -0
backend/llm/groq_client.py +162 -0
backend/main.py +88 -0
backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/data_level0.bin +3 -0
backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/header.bin +3 -0
backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/index_metadata.pickle +3 -0
backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/length.bin +3 -0
backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/link_lists.bin +3 -0
backend/manga_chroma_db/chroma.sqlite3 +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+backend/chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
+backend/manga_chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

backend/.dockerignore ADDED Viewed

	@@ -0,0 +1,21 @@

+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+.env
+.venv/
+venv/
+ENV/
+*.egg-info/
+.eggs/
+dist/
+build/
+*.egg
+.git
+.gitignore
+.pytest_cache/
+.mypy_cache/
+*.log
+.DS_Store
+Thumbs.db

backend/.env.example ADDED Viewed

	@@ -0,0 +1,29 @@

+# Environment Configuration Template
+# Copy this to .env and fill in your values
+# Required: Groq API Key for AI chat
+GROQ_API_KEY=your_groq_api_key_here
+# Optional: MyAnimeList API (for MAL import feature)
+MAL_CLIENT_ID=your_mal_client_id
+MAL_CLIENT_SECRET=your_mal_client_secret
+# Server Configuration (defaults work for development)
+HOST=0.0.0.0
+PORT=8000
+# LLM Model (default: llama-3.1-8b-instant)
+LLM_MODEL=llama-3.1-8b-instant
+# Database Path (default: ./aniverse.db)
+DATABASE_PATH=./aniverse.db
+# ChromaDB Paths (defaults work for Docker)
+CHROMA_DB_PATH=./data/chroma_db
+MANGA_CHROMA_DB_PATH=./data/manga_chroma_db
+# CORS Origins (comma-separated for production)
+CORS_ORIGINS=http://localhost:5500,http://localhost:3000
+# Production Mode (set to 'true' for production)
+PRODUCTION=false

backend/.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11.0

backend/Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+# AniVerse Backend - Hugging Face Spaces Dockerfile
+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV PORT=7860
+# Download data on startup and run server
+CMD python setup_data.py && uvicorn main:app --host 0.0.0.0 --port 7860

backend/README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+---
+title: AniVerse API
+emoji: 🎌
+colorFrom: purple
+colorTo: pink
+sdk: docker
+app_port: 7860
+---

backend/__pycache__/config.cpython-313.pyc ADDED Viewed

Binary file (939 Bytes). View file

backend/__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (3.14 kB). View file

backend/__pycache__/security.cpython-313.pyc ADDED Viewed

Binary file (5.01 kB). View file

backend/aniverse.db ADDED Viewed

Binary file (36.9 kB). View file

backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e0ec6d7f7c3e18b3416b0c55da30d6357a455820b85b3935dd1e630a719e19e
+size 34616104

backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3dd664db442013f61804af49e0c50db29dd271dfaec5d2d737b62219d2e8ada
+size 100

backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/index_metadata.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8488b075b81b5e4a24c0d5a07c346f431b467db1d4bc40faf5b7b38bd8955a07
+size 605982

backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c37ac8716cd37bc387370b60d0552618ac361de76f5c44e32aa5aa17cb2b5dd
+size 82616

backend/chroma_db/24e7682a-e5b5-4ac0-89db-2d9ca2508335/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f3973cb0499e959fdc45c3c1900beca3c75e0ed7e67fa93818a84c64a250268
+size 180876

backend/chroma_db/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc52dc177e5a53724f27d6656234d10c623ddc1f014125b2738e0bceb777cc67
+size 134754304

backend/config.py ADDED Viewed

	@@ -0,0 +1,33 @@

+"""AniVerse Configuration"""
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+# Paths - support both local development and container deployment
+BASE_DIR = Path(__file__).parent.parent
+BACKEND_DIR = Path(__file__).parent
+# Check if running in container (dataset will be in /app/dataset)
+if (BACKEND_DIR / "dataset").exists():
+    DATASET_PATH = BACKEND_DIR / "dataset" / "anime.csv"
+    MANGA_DATASET_PATH = BACKEND_DIR / "dataset" / "manga_data" / "MAL-manga.csv"
+else:
+    DATASET_PATH = BASE_DIR / "dataset" / "anime.csv"
+    MANGA_DATASET_PATH = BASE_DIR / "dataset" / "manga data" / "MAL-manga.csv"
+# ChromaDB paths - use environment variables with fallbacks
+CHROMA_DB_PATH = Path(os.getenv("CHROMA_DB_PATH", str(BACKEND_DIR / "chroma_db")))
+MANGA_CHROMA_DB_PATH = Path(os.getenv("MANGA_CHROMA_DB_PATH", str(BACKEND_DIR / "manga_chroma_db")))
+# API Keys
+GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
+# Model Settings
+EMBEDDING_MODEL = "all-MiniLM-L6-v2"
+LLM_MODEL = "llama-3.1-8b-instant"  # Fast, free on Groq
+# API Settings
+JIKAN_BASE_URL = "https://api.jikan.moe/v4"
+JIKAN_RATE_LIMIT = 3  # requests per second

backend/data/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Data module

backend/data/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (130 Bytes). View file

backend/data/__pycache__/anime_schema.cpython-313.pyc ADDED Viewed

Binary file (3.96 kB). View file

backend/data/__pycache__/data_loader.cpython-313.pyc ADDED Viewed

Binary file (10.4 kB). View file

backend/data/__pycache__/database.cpython-313.pyc ADDED Viewed

Binary file (4.38 kB). View file

backend/data/__pycache__/manga_loader.cpython-313.pyc ADDED Viewed

Binary file (7.02 kB). View file

backend/data/__pycache__/manga_schema.cpython-313.pyc ADDED Viewed

Binary file (2.65 kB). View file

backend/data/anime_schema.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""Anime Data Models"""
+from pydantic import BaseModel, Field
+from typing import Optional
+import ast
+class Anime(BaseModel):
+    """Core anime data model"""
+    mal_id: int = Field(..., description="MyAnimeList ID")
+    title: str
+    title_english: Optional[str] = None
+    title_japanese: Optional[str] = None
+    media_type: str = "tv"
+    episodes: Optional[int] = None
+    status: str = "unknown"
+    score: Optional[float] = None
+    scored_by: Optional[int] = None
+    rank: Optional[int] = None
+    popularity: Optional[int] = None
+    favorites: Optional[int] = None
+    synopsis: Optional[str] = None
+    genres: list[str] = []
+    studios: list[str] = []
+    source: Optional[str] = None
+    rating: Optional[str] = None
+    image_url: Optional[str] = None
+    start_date: Optional[str] = None
+    end_date: Optional[str] = None
+class AnimeSearchResult(BaseModel):
+    """Search result with similarity score"""
+    anime: Anime
+    similarity: float = Field(..., ge=0, le=1)
+class ChatMessage(BaseModel):
+    """Chat message for AI recommendations"""
+    role: str = Field(..., pattern="^(user|assistant)$")
+    content: str
+class RecommendationRequest(BaseModel):
+    """Request for AI recommendations"""
+    query: str
+    history: list[ChatMessage] = []
+    limit: int = Field(default=10, ge=1, le=50)
+class ReviewSummary(BaseModel):
+    """Summarized review data"""
+    overall_sentiment: str  # positive, negative, mixed
+    pros: list[str]
+    cons: list[str]
+    summary: str
+    aspect_scores: dict[str, float] = {}  # story, animation, characters, etc.
+def parse_list_field(value: str) -> list[str]:
+    """Parse stringified list from CSV"""
+    if not value or value == "[]" or isinstance(value, float):
+        return []
+    try:
+        # Handle Python list string format: "['Action', 'Adventure']"
+        return ast.literal_eval(value)
+    except (ValueError, SyntaxError):
+        # Handle comma-separated format
+        return [g.strip() for g in str(value).split(",") if g.strip()]

backend/data/data_loader.py ADDED Viewed

	@@ -0,0 +1,170 @@

+"""Load and process anime dataset"""
+import pandas as pd
+from pathlib import Path
+from typing import Generator
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import DATASET_PATH
+from data.anime_schema import Anime, parse_list_field
+def load_anime_dataset(limit: int = None) -> pd.DataFrame:
+    """Load anime dataset from CSV"""
+    print(f"Loading dataset from {DATASET_PATH}...")
+    df = pd.read_csv(DATASET_PATH, nrows=limit)
+    # Rename columns to match our schema
+    column_mapping = {
+        "id": "mal_id",
+        "mean": "score",
+        "num_scoring_users": "scored_by",
+        "num_favorites": "favorites",
+        "main_picture_medium": "image_url",
+        "alternative_titles_en": "title_english",
+        "alternative_titles_ja": "title_japanese",
+    }
+    df = df.rename(columns=column_mapping)
+    print(f"Loaded {len(df)} anime entries")
+    return df
+def parse_anime_row(row: pd.Series) -> Anime:
+    """Convert DataFrame row to Anime model"""
+    return Anime(
+        mal_id=int(row["mal_id"]),
+        title=str(row.get("title", "Unknown")),
+        title_english=row.get("title_english") if pd.notna(row.get("title_english")) else None,
+        title_japanese=row.get("title_japanese") if pd.notna(row.get("title_japanese")) else None,
+        media_type=str(row.get("media_type", "unknown")),
+        episodes=int(row["num_episodes"]) if pd.notna(row.get("num_episodes")) and row.get("num_episodes") != 0 else None,
+        status=str(row.get("status", "unknown")),
+        score=float(row["score"]) if pd.notna(row.get("score")) else None,
+        scored_by=int(row["scored_by"]) if pd.notna(row.get("scored_by")) else None,
+        rank=int(row["rank"]) if pd.notna(row.get("rank")) else None,
+        popularity=int(row["popularity"]) if pd.notna(row.get("popularity")) else None,
+        favorites=int(row["favorites"]) if pd.notna(row.get("favorites")) else None,
+        synopsis=str(row.get("synopsis", "")) if pd.notna(row.get("synopsis")) else None,
+        genres=parse_list_field(row.get("genres", "[]")),
+        studios=parse_list_field(row.get("studios", "[]")),
+        source=str(row.get("source")) if pd.notna(row.get("source")) else None,
+        rating=str(row.get("rating")) if pd.notna(row.get("rating")) else None,
+        image_url=str(row.get("image_url")) if pd.notna(row.get("image_url")) else None,
+        start_date=str(row.get("start_date")) if pd.notna(row.get("start_date")) else None,
+        end_date=str(row.get("end_date")) if pd.notna(row.get("end_date")) else None,
+    )
+def iter_anime(df: pd.DataFrame) -> Generator[Anime, None, None]:
+    """Iterate over anime entries as Pydantic models"""
+    for _, row in df.iterrows():
+        try:
+            yield parse_anime_row(row)
+        except Exception as e:
+            print(f"Error parsing row {row.get('mal_id', 'unknown')}: {e}")
+            continue
+def create_embedding_text(anime: Anime) -> str:
+    """Create text for embedding generation"""
+    parts = [anime.title]
+    if anime.title_english and anime.title_english != anime.title:
+        parts.append(anime.title_english)
+    if anime.genres:
+        parts.append(f"Genres: {', '.join(anime.genres)}")
+    if anime.synopsis:
+        # Truncate synopsis to prevent overly long embeddings
+        synopsis = anime.synopsis[:1000]
+        parts.append(synopsis)
+        # Extract scene keywords for better scene-based search
+        scene_keywords = extract_scene_keywords(synopsis, anime.genres or [])
+        if scene_keywords:
+            parts.append(f"Scenes and tropes: {', '.join(scene_keywords)}")
+    return " | ".join(parts)
+# Scene/trope detection patterns
+SCENE_PATTERNS = {
+    # Romantic scenes
+    "confession": ["confess", "confession", "i love you", "feelings for", "admit feelings"],
+    "rooftop scene": ["rooftop", "on the roof", "school rooftop"],
+    "beach episode": ["beach", "swimsuit", "ocean", "summer vacation"],
+    "festival date": ["festival", "fireworks", "yukata", "summer festival"],
+    "accidental kiss": ["accidental", "lips touched", "fell on"],
+    # Action scenes
+    "training arc": ["training", "train harder", "become stronger", "special training"],
+    "tournament arc": ["tournament", "competition", "championship", "finals"],
+    "final battle": ["final battle", "last fight", "ultimate showdown", "final boss"],
+    "power awakening": ["awakens", "hidden power", "true power", "unleash"],
+    "sacrifice": ["sacrifice", "gave their life", "protect everyone", "died saving"],
+    # Emotional scenes
+    "tearful goodbye": ["goodbye", "farewell", "parting", "separation"],
+    "death scene": ["death", "died", "killed", "passed away", "funeral"],
+    "reunion": ["reunite", "reunion", "meet again", "found each other"],
+    "flashback": ["flashback", "memories", "past", "childhood"],
+    "redemption arc": ["redemption", "atone", "make amends", "change their ways"],
+    # Character tropes
+    "overpowered protagonist": ["overpowered", "strongest", "unbeatable", "one punch", "no match"],
+    "hidden identity": ["secret identity", "hiding", "disguise", "true self"],
+    "underdog story": ["underdog", "weakest", "looked down upon", "prove them wrong"],
+    "transfer student": ["transfer student", "new student", "just arrived"],
+    "chosen one": ["chosen", "prophecy", "destined", "fate"],
+    # Setting/atmosphere
+    "post-apocalyptic": ["apocalypse", "post-apocalyptic", "destroyed world", "ruins"],
+    "isekai": ["another world", "transported", "reincarnated", "summoned to"],
+    "time loop": ["time loop", "repeating", "stuck in time", "groundhog"],
+    "school setting": ["high school", "academy", "school", "classroom"],
+    "dystopian": ["dystopia", "oppressive", "government control", "rebellion"],
+}
+def extract_scene_keywords(synopsis: str, genres: list[str]) -> list[str]:
+    """Extract scene/trope keywords from synopsis for better search"""
+    if not synopsis:
+        return []
+    synopsis_lower = synopsis.lower()
+    detected = []
+    for scene_name, patterns in SCENE_PATTERNS.items():
+        for pattern in patterns:
+            if pattern in synopsis_lower:
+                detected.append(scene_name)
+                break
+    # Add genre-based common tropes
+    genre_tropes = {
+        "Romance": ["love triangle", "slow burn romance"],
+        "Action": ["battle scenes", "fight choreography"],
+        "Comedy": ["comedic moments", "slapstick"],
+        "Drama": ["emotional moments", "character development"],
+        "Horror": ["scary scenes", "tension building"],
+        "Sports": ["match scenes", "team dynamics"],
+        "Music": ["performance scenes", "concert"],
+    }
+    for genre in genres:
+        if genre in genre_tropes:
+            detected.extend(genre_tropes[genre])
+    return list(set(detected))[:10]  # Limit to 10 keywords
+if __name__ == "__main__":
+    # Test loading
+    df = load_anime_dataset(limit=10)
+    for anime in iter_anime(df):
+        print(f"{anime.mal_id}: {anime.title} ({anime.score}) - {anime.genres}")
+        print(f"  Embedding text: {create_embedding_text(anime)[:150]}...")
+        print()

backend/data/database.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""Database setup and models"""
+import os
+from datetime import datetime
+from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, ForeignKey, Enum as SQLEnum
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, relationship
+import enum
+# Database path
+DB_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "aniverse.db")
+DATABASE_URL = f"sqlite:///{DB_PATH}"
+# Create engine
+engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+Base = declarative_base()
+class AnimeStatus(enum.Enum):
+    """User's status for an anime"""
+    watching = "watching"
+    completed = "completed"
+    planned = "planned"
+    dropped = "dropped"
+    on_hold = "on_hold"
+class User(Base):
+    """User account model"""
+    __tablename__ = "users"
+    id = Column(Integer, primary_key=True, index=True)
+    email = Column(String, unique=True, index=True, nullable=False)
+    username = Column(String, unique=True, index=True, nullable=False)
+    password_hash = Column(String, nullable=False)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    # Relationships
+    anime_list = relationship("UserAnime", back_populates="user")
+    manga_list = relationship("UserManga", back_populates="user")
+class UserAnime(Base):
+    """User's anime list entry"""
+    __tablename__ = "user_anime"
+    id = Column(Integer, primary_key=True, index=True)
+    user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
+    anime_id = Column(Integer, nullable=False)  # MAL ID
+    status = Column(SQLEnum(AnimeStatus), default=AnimeStatus.planned)
+    rating = Column(Float, nullable=True)  # 1-10 scale
+    is_favorite = Column(Integer, default=0)  # SQLite boolean
+    added_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    # Relationships
+    user = relationship("User", back_populates="anime_list")
+class UserManga(Base):
+    """User's manga list entry"""
+    __tablename__ = "user_manga"
+    id = Column(Integer, primary_key=True, index=True)
+    user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
+    manga_id = Column(Integer, nullable=False)  # MAL ID
+    status = Column(SQLEnum(AnimeStatus), default=AnimeStatus.planned)  # Reuse status enum
+    rating = Column(Float, nullable=True)  # 1-10 scale
+    is_favorite = Column(Integer, default=0)  # SQLite boolean
+    added_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    # Relationships
+    user = relationship("User", back_populates="manga_list")
+def init_db():
+    """Initialize database tables"""
+    Base.metadata.create_all(bind=engine)
+    print(f"Database initialized at {DB_PATH}")
+def get_db():
+    """Get database session"""
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+if __name__ == "__main__":
+    init_db()
+    print("Database tables created successfully!")

backend/data/manga_loader.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""Load and process manga dataset"""
+import pandas as pd
+from pathlib import Path
+from typing import Generator
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import MANGA_DATASET_PATH
+from data.manga_schema import Manga, parse_list_field
+def load_manga_dataset(limit: int = None) -> pd.DataFrame:
+    """Load manga dataset from CSV"""
+    print(f"Loading manga dataset from {MANGA_DATASET_PATH}...")
+    df = pd.read_csv(MANGA_DATASET_PATH, nrows=limit)
+    # Clean up column names
+    df.columns = df.columns.str.strip()
+    print(f"Loaded {len(df)} manga entries")
+    print(f"Columns: {df.columns.tolist()}")
+    return df
+def parse_manga_row(row: pd.Series) -> Manga:
+    """Convert DataFrame row to Manga model"""
+    # Extract mal_id from URL if available
+    mal_id = None
+    if pd.notna(row.get("page_url")):
+        try:
+            # URL format: https://myanimelist.net/manga/ID/title
+            url = str(row["page_url"])
+            parts = url.split("/manga/")
+            if len(parts) > 1:
+                mal_id = int(parts[1].split("/")[0])
+        except (ValueError, IndexError):
+            pass
+    if mal_id is None:
+        # Use index or unnamed column
+        mal_id = int(row.get("Unnamed: 0", row.name)) if pd.notna(row.get("Unnamed: 0")) else row.name
+    # Parse volumes
+    volumes = None
+    if pd.notna(row.get("Volumes")):
+        try:
+            vol_str = str(row["Volumes"]).strip()
+            if vol_str.isdigit():
+                volumes = int(vol_str)
+        except ValueError:
+            pass
+    # Parse score
+    score = None
+    if pd.notna(row.get("Score")):
+        try:
+            score = float(row["Score"])
+        except (ValueError, TypeError):
+            pass
+    # Parse members
+    members = None
+    if pd.notna(row.get("Members")):
+        try:
+            members = int(str(row["Members"]).replace(",", ""))
+        except (ValueError, TypeError):
+            pass
+    # Parse rank
+    rank = None
+    if pd.notna(row.get("Rank")):
+        try:
+            rank = int(row["Rank"])
+        except (ValueError, TypeError):
+            pass
+    return Manga(
+        mal_id=mal_id,
+        title=str(row.get("Title", "Unknown")).strip(),
+        media_type=str(row.get("Type", "Manga")).strip().lower() if pd.notna(row.get("Type")) else "manga",
+        volumes=volumes,
+        score=score,
+        rank=rank,
+        members=members,
+        published=str(row.get("Published")) if pd.notna(row.get("Published")) else None,
+        genres=parse_list_field(row.get("Genres", "[]")),
+        authors=parse_list_field(row.get("Authors", "[]")),
+        image_url=str(row.get("image_url")) if pd.notna(row.get("image_url")) else None,
+    )
+def iter_manga(df: pd.DataFrame) -> Generator[Manga, None, None]:
+    """Iterate over manga entries as Pydantic models"""
+    for _, row in df.iterrows():
+        try:
+            yield parse_manga_row(row)
+        except Exception as e:
+            print(f"Error parsing manga row {row.get('Title', 'unknown')}: {e}")
+            continue
+def create_manga_embedding_text(manga: Manga) -> str:
+    """Create text for embedding generation"""
+    parts = [manga.title]
+    if manga.genres:
+        parts.append(f"Genres: {', '.join(manga.genres)}")
+    if manga.media_type:
+        parts.append(f"Type: {manga.media_type}")
+    if manga.authors:
+        parts.append(f"Authors: {', '.join(manga.authors[:3])}")
+    if manga.synopsis:
+        synopsis = manga.synopsis[:1000]
+        parts.append(synopsis)
+    return " | ".join(parts)
+if __name__ == "__main__":
+    # Test loading
+    df = load_manga_dataset(limit=10)
+    for manga in iter_manga(df):
+        print(f"{manga.mal_id}: {manga.title} (Score: {manga.score}) - {manga.genres}")
+        print(f"  Type: {manga.media_type}, Volumes: {manga.volumes}")
+        print()

backend/data/manga_schema.py ADDED Viewed

	@@ -0,0 +1,52 @@

+"""Manga data schema"""
+from pydantic import BaseModel
+from typing import Optional
+import ast
+class Manga(BaseModel):
+    """Manga entry with MAL-style fields"""
+    mal_id: int
+    title: str
+    title_english: Optional[str] = None
+    media_type: str = "manga"  # manga, manhwa, manhua, novel, light_novel
+    volumes: Optional[int] = None
+    chapters: Optional[int] = None
+    status: Optional[str] = None  # publishing, finished
+    score: Optional[float] = None
+    scored_by: Optional[int] = None
+    rank: Optional[int] = None
+    popularity: Optional[int] = None
+    members: Optional[int] = None
+    favorites: Optional[int] = None
+    synopsis: Optional[str] = None
+    genres: list[str] = []
+    authors: list[str] = []
+    image_url: Optional[str] = None
+    published: Optional[str] = None
+def parse_list_field(value) -> list[str]:
+    """Parse string list field from CSV"""
+    if not value or (isinstance(value, float) and str(value) == 'nan'):
+        return []
+    if isinstance(value, list):
+        return value
+    if isinstance(value, str):
+        value = value.strip()
+        if value.startswith('['):
+            try:
+                parsed = ast.literal_eval(value)
+                return [str(item) for item in parsed if item]
+            except (ValueError, SyntaxError):
+                pass
+        # Try comma-separated
+        if ',' in value:
+            return [v.strip() for v in value.split(',') if v.strip()]
+        return [value] if value else []
+    return []

backend/embeddings/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Embeddings module

backend/embeddings/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (136 Bytes). View file

backend/embeddings/__pycache__/chroma_store.cpython-313.pyc ADDED Viewed

Binary file (5.84 kB). View file

backend/embeddings/__pycache__/manga_chroma_store.cpython-313.pyc ADDED Viewed

Binary file (5.88 kB). View file

backend/embeddings/__pycache__/search_utils.cpython-313.pyc ADDED Viewed

Binary file (4.81 kB). View file

backend/embeddings/build_embeddings.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""Build anime embeddings and populate vector store"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from data.data_loader import load_anime_dataset, iter_anime, create_embedding_text
+from embeddings.chroma_store import get_vector_store
+def build_embeddings(limit: int = None, batch_size: int = 100):
+    """Build embeddings for all anime and store in ChromaDB"""
+    print("=" * 50)
+    print("AniVerse Embedding Builder")
+    print("=" * 50)
+    # Load dataset
+    df = load_anime_dataset(limit=limit)
+    # Initialize vector store
+    store = get_vector_store()
+    existing_count = store.get_count()
+    print(f"Existing entries in vector store: {existing_count}")
+    # Prepare batch data
+    ids = []
+    texts = []
+    metadatas = []
+    print("Processing anime entries...")
+    for anime in iter_anime(df):
+        # Skip entries without synopsis (poor embeddings)
+        if not anime.synopsis or len(anime.synopsis) < 20:
+            continue
+        ids.append(anime.mal_id)
+        texts.append(create_embedding_text(anime))
+        metadatas.append({
+            "title": anime.title,
+            "score": anime.score or 0,
+            "genres": ", ".join(anime.genres) if anime.genres else "",
+            "media_type": anime.media_type,
+            "status": anime.status,
+            "image_url": anime.image_url or "",
+        })
+    print(f"Prepared {len(ids)} anime entries for embedding")
+    # Add to vector store
+    print("Generating embeddings and storing in ChromaDB...")
+    store.add_batch(ids, texts, metadatas, batch_size=batch_size)
+    print("=" * 50)
+    print(f"Complete! Vector store now has {store.get_count()} entries")
+    print("=" * 50)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Build anime embeddings")
+    parser.add_argument("--limit", type=int, default=None, help="Limit number of entries to process")
+    parser.add_argument("--batch-size", type=int, default=100, help="Batch size for embedding generation")
+    args = parser.parse_args()
+    build_embeddings(limit=args.limit, batch_size=args.batch_size)

backend/embeddings/build_manga_embeddings.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""Build manga embeddings and store in ChromaDB"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from data.manga_loader import load_manga_dataset, iter_manga, create_manga_embedding_text
+from embeddings.manga_chroma_store import MangaVectorStore
+def build_manga_embeddings(limit: int = None):
+    """Build embeddings for manga dataset"""
+    print("="*50)
+    print("Building Manga Embeddings")
+    print("="*50)
+    # Load dataset
+    df = load_manga_dataset(limit=limit)
+    # Initialize vector store
+    store = MangaVectorStore()
+    # Collect data for batch insert
+    ids = []
+    texts = []
+    metadatas = []
+    print("\nProcessing manga entries...")
+    for manga in iter_manga(df):
+        embedding_text = create_manga_embedding_text(manga)
+        metadata = {
+            "title": manga.title,
+            "media_type": manga.media_type or "manga",
+            "score": manga.score or 0,
+            "rank": manga.rank or 0,
+            "members": manga.members or 0,
+            "volumes": manga.volumes or 0,
+            "genres": ", ".join(manga.genres) if manga.genres else "",
+            "authors": ", ".join(manga.authors[:3]) if manga.authors else "",
+            "image_url": manga.image_url or "",
+            "published": manga.published or "",
+        }
+        ids.append(manga.mal_id)
+        texts.append(embedding_text)
+        metadatas.append(metadata)
+    print(f"\nAdding {len(ids)} manga to vector store...")
+    store.add_batch(ids, texts, metadatas, batch_size=100)
+    print(f"\n✓ Successfully indexed {store.get_count()} manga entries!")
+    print("="*50)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Build manga embeddings")
+    parser.add_argument("--limit", type=int, help="Limit number of entries")
+    args = parser.parse_args()
+    build_manga_embeddings(limit=args.limit)

backend/embeddings/chroma_store.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""ChromaDB Vector Store for Anime Similarity Search"""
+import chromadb
+from chromadb.config import Settings
+from typing import Optional
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import CHROMA_DB_PATH, EMBEDDING_MODEL
+class AnimeVectorStore:
+    """Vector database for anime semantic search"""
+    def __init__(self, persist_directory: str = None):
+        self.persist_dir = persist_directory or str(CHROMA_DB_PATH)
+        try:
+            # Initialize ChromaDB client with telemetry disabled
+            self.client = chromadb.PersistentClient(
+                path=self.persist_dir,
+                settings=Settings(
+                    anonymized_telemetry=False,
+                    allow_reset=True
+                )
+            )
+            # Use sentence-transformers for embeddings (more compatible than onnxruntime)
+            from chromadb.utils import embedding_functions
+            self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
+                model_name=EMBEDDING_MODEL
+            )
+            # Get or create anime collection with embedding function
+            self.collection = self.client.get_or_create_collection(
+                name="anime",
+                metadata={"hnsw:space": "cosine"},
+                embedding_function=self.embedding_fn
+            )
+            print(f"Vector store initialized at {self.persist_dir}")
+            print(f"Collection count: {self.collection.count()}")
+        except Exception as e:
+            print(f"ERROR initializing vector store: {e}")
+            import traceback
+            traceback.print_exc()
+            raise
+    def add_anime(
+        self,
+        mal_id: int,
+        embedding_text: str,
+        metadata: dict
+    ) -> None:
+        """Add or update anime entry in vector store"""
+        # Upsert to collection (embeddings auto-generated)
+        self.collection.upsert(
+            ids=[str(mal_id)],
+            documents=[embedding_text],
+            metadatas=[metadata]
+        )
+    def add_batch(
+        self,
+        ids: list[int],
+        texts: list[str],
+        metadatas: list[dict],
+        batch_size: int = 100
+    ) -> None:
+        """Add multiple anime entries in batches"""
+        total = len(ids)
+        for i in range(0, total, batch_size):
+            batch_ids = [str(id_) for id_ in ids[i:i+batch_size]]
+            batch_texts = texts[i:i+batch_size]
+            batch_meta = metadatas[i:i+batch_size]
+            self.collection.upsert(
+                ids=batch_ids,
+                documents=batch_texts,
+                metadatas=batch_meta
+            )
+            print(f"  Added {min(i+batch_size, total)}/{total} entries...")
+    def search(
+        self,
+        query: str,
+        n_results: int = 10,
+        where: Optional[dict] = None
+    ) -> list[dict]:
+        """Search for similar anime by text query"""
+        # Query ChromaDB (embedding auto-generated from query)
+        results = self.collection.query(
+            query_texts=[query],
+            n_results=n_results,
+            where=where,
+            include=["metadatas", "documents", "distances"]
+        )
+        # Format results
+        formatted = []
+        for i, mal_id in enumerate(results["ids"][0]):
+            formatted.append({
+                "mal_id": int(mal_id),
+                "metadata": results["metadatas"][0][i],
+                "document": results["documents"][0][i],
+                "similarity": 1 - results["distances"][0][i]  # Convert distance to similarity
+            })
+        return formatted
+    def search_similar(
+        self,
+        mal_id: int,
+        n_results: int = 10
+    ) -> list[dict]:
+        """Find anime similar to a given anime by MAL ID"""
+        # Get the anime's document
+        result = self.collection.get(
+            ids=[str(mal_id)],
+            include=["documents"]
+        )
+        if not result["documents"]:
+            return []
+        # Query with that document
+        results = self.collection.query(
+            query_texts=result["documents"],
+            n_results=n_results + 1,  # +1 to exclude self
+            include=["metadatas", "documents", "distances"]
+        )
+        # Format and exclude self
+        formatted = []
+        for i, id_ in enumerate(results["ids"][0]):
+            if int(id_) == mal_id:
+                continue
+            formatted.append({
+                "mal_id": int(id_),
+                "metadata": results["metadatas"][0][i],
+                "document": results["documents"][0][i],
+                "similarity": 1 - results["distances"][0][i]
+            })
+        return formatted[:n_results]
+    def get_count(self) -> int:
+        """Get total number of entries in the collection"""
+        return self.collection.count()
+# Singleton instance
+_store: Optional[AnimeVectorStore] = None
+def get_vector_store() -> AnimeVectorStore:
+    """Get or create vector store instance"""
+    global _store
+    if _store is None:
+        _store = AnimeVectorStore()
+    return _store

backend/embeddings/manga_chroma_store.py ADDED Viewed

	@@ -0,0 +1,156 @@

+"""ChromaDB Vector Store for Manga Similarity Search"""
+import chromadb
+from chromadb.config import Settings
+from chromadb.utils import embedding_functions
+from typing import Optional
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import MANGA_CHROMA_DB_PATH, EMBEDDING_MODEL
+class MangaVectorStore:
+    """Vector database for manga semantic search"""
+    def __init__(self, persist_directory: str = None):
+        self.persist_dir = persist_directory or str(MANGA_CHROMA_DB_PATH)
+        try:
+            # Initialize ChromaDB client
+            self.client = chromadb.PersistentClient(
+                path=self.persist_dir,
+                settings=Settings(
+                    anonymized_telemetry=False,
+                    allow_reset=True
+                )
+            )
+            # Use sentence-transformers for embeddings (more compatible)
+            self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
+                model_name=EMBEDDING_MODEL
+            )
+            # Get or create manga collection with embedding function
+            self.collection = self.client.get_or_create_collection(
+                name="manga",
+                metadata={"hnsw:space": "cosine"},
+                embedding_function=self.embedding_fn
+            )
+            print(f"Manga vector store initialized at {self.persist_dir}")
+            print(f"Manga collection count: {self.collection.count()}")
+        except Exception as e:
+            print(f"ERROR initializing manga vector store: {e}")
+            import traceback
+            traceback.print_exc()
+            raise
+    def add_manga(
+        self,
+        mal_id: int,
+        embedding_text: str,
+        metadata: dict
+    ) -> None:
+        """Add or update manga entry in vector store"""
+        self.collection.upsert(
+            ids=[str(mal_id)],
+            documents=[embedding_text],
+            metadatas=[metadata]
+        )
+    def add_batch(
+        self,
+        ids: list[int],
+        texts: list[str],
+        metadatas: list[dict],
+        batch_size: int = 100
+    ) -> None:
+        """Add multiple manga entries in batches"""
+        total = len(ids)
+        for i in range(0, total, batch_size):
+            batch_ids = [str(id_) for id_ in ids[i:i+batch_size]]
+            batch_texts = texts[i:i+batch_size]
+            batch_meta = metadatas[i:i+batch_size]
+            self.collection.upsert(
+                ids=batch_ids,
+                documents=batch_texts,
+                metadatas=batch_meta
+            )
+            print(f"  Added {min(i+batch_size, total)}/{total} manga entries...")
+    def search(
+        self,
+        query: str,
+        n_results: int = 10,
+        where: Optional[dict] = None
+    ) -> list[dict]:
+        """Search for similar manga by text query"""
+        results = self.collection.query(
+            query_texts=[query],
+            n_results=n_results,
+            where=where,
+            include=["metadatas", "documents", "distances"]
+        )
+        formatted = []
+        for i, mal_id in enumerate(results["ids"][0]):
+            formatted.append({
+                "mal_id": int(mal_id),
+                "metadata": results["metadatas"][0][i],
+                "document": results["documents"][0][i],
+                "similarity": 1 - results["distances"][0][i]
+            })
+        return formatted
+    def search_similar(
+        self,
+        mal_id: int,
+        n_results: int = 10
+    ) -> list[dict]:
+        """Find manga similar to a given manga by MAL ID"""
+        result = self.collection.get(
+            ids=[str(mal_id)],
+            include=["documents"]
+        )
+        if not result["documents"]:
+            return []
+        results = self.collection.query(
+            query_texts=result["documents"],
+            n_results=n_results + 1,
+            include=["metadatas", "documents", "distances"]
+        )
+        formatted = []
+        for i, id_ in enumerate(results["ids"][0]):
+            if int(id_) == mal_id:
+                continue
+            formatted.append({
+                "mal_id": int(id_),
+                "metadata": results["metadatas"][0][i],
+                "document": results["documents"][0][i],
+                "similarity": 1 - results["distances"][0][i]
+            })
+        return formatted[:n_results]
+    def get_count(self) -> int:
+        """Get total number of entries in the collection"""
+        return self.collection.count()
+# Singleton instance
+_manga_store: Optional[MangaVectorStore] = None
+def get_manga_vector_store() -> MangaVectorStore:
+    """Get or create manga vector store instance"""
+    global _manga_store
+    if _manga_store is None:
+        _manga_store = MangaVectorStore()
+    return _manga_store

backend/embeddings/search_utils.py ADDED Viewed

	@@ -0,0 +1,126 @@

+"""Search utilities for improved ranking and filtering"""
+from typing import Optional
+def calculate_combined_score(
+    similarity: float,
+    anime_score: float,
+    popularity: int = None,
+    weight_similarity: float = 0.6,
+    weight_anime_score: float = 0.3,
+    weight_popularity: float = 0.1
+) -> float:
+    """
+    Calculate a combined ranking score.
+    Args:
+        similarity: Vector similarity (0-1)
+        anime_score: MAL score (0-10)
+        popularity: Popularity rank (lower is better)
+        weight_*: Weights for each factor
+    Returns:
+        Combined score (0-1)
+    """
+    # Normalize anime score to 0-1
+    normalized_score = (anime_score or 0) / 10
+    # Normalize popularity (inverse, since lower rank = more popular)
+    normalized_pop = 0.5  # Default if not available
+    if popularity and popularity > 0:
+        # Map rank 1-1000 to 1-0.5, rank > 1000 to 0.5-0.1
+        if popularity <= 1000:
+            normalized_pop = 1 - (popularity / 2000)
+        else:
+            normalized_pop = max(0.1, 0.5 - (popularity - 1000) / 20000)
+    combined = (
+        weight_similarity * similarity +
+        weight_anime_score * normalized_score +
+        weight_popularity * normalized_pop
+    )
+    return round(combined, 4)
+def rerank_results(results: list[dict], limit: int = 15) -> list[dict]:
+    """
+    Rerank search results using combined scoring.
+    Args:
+        results: List of search results with metadata
+        limit: Max results to return
+    Returns:
+        Reranked and limited results
+    """
+    for r in results:
+        r["combined_score"] = calculate_combined_score(
+            similarity=r.get("similarity", 0),
+            anime_score=r.get("metadata", {}).get("score", 0),
+            popularity=r.get("metadata", {}).get("popularity")
+        )
+    # Sort by combined score
+    reranked = sorted(results, key=lambda x: x["combined_score"], reverse=True)
+    return reranked[:limit]
+def build_genre_filter(genres: list[str]) -> dict:
+    """Build ChromaDB where filter for genres"""
+    if not genres:
+        return None
+    # ChromaDB uses $contains for partial string match
+    if len(genres) == 1:
+        return {"genres": {"$contains": genres[0]}}
+    # Multiple genres: any match
+    return {"$or": [{"genres": {"$contains": g}} for g in genres]}
+def extract_keywords(query: str) -> list[str]:
+    """Extract important keywords from search query"""
+    # Common words to ignore
+    stop_words = {
+        "anime", "like", "similar", "to", "with", "the", "a", "an", "and", "or",
+        "that", "has", "have", "good", "best", "top", "show", "series", "want",
+        "looking", "for", "something", "recommend", "me", "please", "i", "my"
+    }
+    words = query.lower().split()
+    keywords = [w.strip(",.!?") for w in words if w.strip(",.!?") not in stop_words]
+    return keywords
+# Genre keyword mappings for better matching
+GENRE_KEYWORDS = {
+    "action": ["action", "fight", "battle", "combat", "war"],
+    "romance": ["romance", "love", "romantic", "relationship", "dating"],
+    "comedy": ["comedy", "funny", "humor", "hilarious", "laugh"],
+    "drama": ["drama", "emotional", "feels", "sad", "tear"],
+    "horror": ["horror", "scary", "terrifying", "creepy", "dark"],
+    "psychological": ["psychological", "mind", "mental", "thriller", "mindbending"],
+    "slice of life": ["slice of life", "daily", "everyday", "relaxing", "wholesome"],
+    "fantasy": ["fantasy", "magic", "wizard", "isekai", "magical"],
+    "sci-fi": ["sci-fi", "scifi", "science fiction", "future", "space", "mecha"],
+    "sports": ["sports", "basketball", "soccer", "volleyball", "baseball"],
+    "mystery": ["mystery", "detective", "investigation", "whodunit"],
+    "supernatural": ["supernatural", "ghost", "spirit", "demon", "paranormal"],
+}
+def detect_genres_from_query(query: str) -> list[str]:
+    """Detect genre preferences from natural language query"""
+    query_lower = query.lower()
+    detected = []
+    for genre, keywords in GENRE_KEYWORDS.items():
+        for kw in keywords:
+            if kw in query_lower:
+                detected.append(genre.title())
+                break
+    return detected

backend/llm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # LLM module

backend/llm/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (129 Bytes). View file

backend/llm/__pycache__/groq_client.cpython-313.pyc ADDED Viewed

Binary file (6.37 kB). View file

backend/llm/groq_client.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""Groq LLM Client for AI Recommendations"""
+import sys
+from pathlib import Path
+from typing import Optional
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import GROQ_API_KEY, LLM_MODEL
+try:
+    from groq import Groq
+except ImportError:
+    Groq = None
+SYSTEM_PROMPT = """You are AniVerse AI, an expert anime and manga recommendation assistant.
+## YOUR CORE MISSION
+Provide HIGHLY RELEVANT, PRECISE recommendations. Quality over quantity. Every suggestion must directly address what the user is looking for.
+## RECOMMENDATION RULES
+1. **Match the Query Exactly**: If user asks for "dark fantasy", recommend dark fantasy - not action comedy.
+2. **Use Context Wisely**: Reference the "Relevant Anime/Manga" data provided. These are semantically matched to the query.
+3. **Explain Your Picks**: For EACH recommendation, give 1-2 sentences on WHY it fits the request.
+4. **Limit Recommendations**: Suggest 2-4 titles max per response. Be selective.
+5. **Format Clearly**: Use bold for titles, include scores and genres inline.
+## PERSONALIZATION (When User Profile Available)
+- Reference their high-rated titles: "Since you gave Attack on Titan a 9..."
+- Avoid genres from low-rated shows
+- Connect new suggestions to their favorites
+## RESPONSE FORMAT
+When recommending, use this structure:
+**[Title]** (★ score/10) - [Brief reason why this matches their request]
+## GUIDELINES
+- Be enthusiastic but concise
+- No spoilers
+- If the context doesn't have good matches, say so honestly
+- You can discuss plots, characters, and themes
+- Support both anime AND manga recommendations
+Context about relevant titles will be provided below."""
+class GroqClient:
+    """Groq LLM client for AI-powered recommendations"""
+    def __init__(self):
+        if not Groq:
+            raise ImportError("groq package not installed. Run: pip install groq")
+        if not GROQ_API_KEY:
+            raise ValueError("GROQ_API_KEY not set. Add it to your .env file")
+        self.client = Groq(api_key=GROQ_API_KEY)
+        self.model = LLM_MODEL
+    def chat(
+        self,
+        user_message: str,
+        context: str = "",
+        history: list[dict] = None,
+        max_tokens: int = 1024
+    ) -> str:
+        """Send a chat message and get a response"""
+        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+        # Add context if provided
+        if context:
+            messages.append({
+                "role": "system",
+                "content": f"Here is relevant anime data from our database:\n\n{context}"
+            })
+        # Add conversation history
+        if history:
+            messages.extend(history)
+        # Add current user message
+        messages.append({"role": "user", "content": user_message})
+        # Call Groq API
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            max_tokens=max_tokens,
+            temperature=0.7,
+        )
+        return response.choices[0].message.content
+    def summarize_reviews(
+        self,
+        reviews: list[str],
+        anime_title: str
+    ) -> dict:
+        """Summarize multiple reviews into pros/cons"""
+        reviews_text = "\n---\n".join(reviews[:10])  # Limit to 10 reviews
+        prompt = f"""Analyze these reviews for "{anime_title}" and provide:
+1. Overall sentiment (positive/negative/mixed)
+2. Top 3 pros (things reviewers loved)
+3. Top 3 cons (things reviewers criticized)
+4. A 2-3 sentence summary
+5. Aspect scores (1-10) for: story, animation, characters, music, enjoyment
+Reviews:
+{reviews_text}
+Respond in JSON format:
+{{
+    "sentiment": "positive|negative|mixed",
+    "pros": ["pro1", "pro2", "pro3"],
+    "cons": ["con1", "con2", "con3"],
+    "summary": "...",
+    "aspects": {{"story": 8, "animation": 9, ...}}
+}}"""
+        response = self.chat(prompt, max_tokens=512)
+        # Parse JSON response (with fallback)
+        import json
+        try:
+            return json.loads(response)
+        except json.JSONDecodeError:
+            return {
+                "sentiment": "mixed",
+                "pros": [],
+                "cons": [],
+                "summary": response,
+                "aspects": {}
+            }
+    def generate_recommendation_reason(
+        self,
+        user_query: str,
+        anime_data: dict
+    ) -> str:
+        """Generate a personalized reason why an anime matches the user's request"""
+        prompt = f"""The user asked: "{user_query}"
+This anime was matched:
+- Title: {anime_data.get('title', 'Unknown')}
+- Genres: {anime_data.get('genres', 'Unknown')}
+- Score: {anime_data.get('score', 'N/A')}
+In 1-2 sentences, explain why this anime matches what the user is looking for. Be specific about the connection."""
+        return self.chat(prompt, max_tokens=150)
+# Singleton
+_client: Optional[GroqClient] = None
+def get_llm_client() -> GroqClient:
+    """Get or create LLM client instance"""
+    global _client
+    if _client is None:
+        _client = GroqClient()
+    return _client

backend/main.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""AniVerse API - Main Entry Point"""
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import sys
+from pathlib import Path
+# Add backend to path
+sys.path.insert(0, str(Path(__file__).parent))
+from routes import search, chat, anime, auth, lists, recommendations, mal_import, manga
+# Create FastAPI app
+app = FastAPI(
+    title="AniVerse API",
+    description="AI-powered anime & manga discovery platform with semantic search, personalized recommendations, and user lists",
+    version="2.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc",
+)
+# CORS middleware - Allow ALL origins for cross-domain requests
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=False,  # Must be False when using wildcard
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include routers
+app.include_router(search.router)
+app.include_router(chat.router)
+app.include_router(anime.router)
+app.include_router(auth.router)
+app.include_router(lists.router)
+app.include_router(recommendations.router)
+app.include_router(mal_import.router)
+app.include_router(manga.router)
+@app.get("/")
+async def root():
+    """API root - health check and info"""
+    return {
+        "name": "AniVerse API",
+        "version": "2.0.0",
+        "status": "running",
+        "endpoints": {
+            "docs": "/docs",
+            "search": "/api/search",
+            "chat": "/api/chat",
+            "anime": "/api/anime",
+            "manga": "/api/manga",
+            "auth": "/api/auth",
+            "lists": "/api/lists",
+            "recommendations": "/api/recommendations",
+        }
+    }
+@app.get("/api/health")
+async def health_check():
+    """Health check endpoint for Docker/k8s"""
+    return {"status": "healthy"}
+@app.get("/api/stats")
+async def get_stats():
+    """Get database statistics"""
+    from embeddings.chroma_store import get_vector_store
+    from embeddings.manga_chroma_store import get_manga_vector_store
+    from config import DATASET_PATH
+    import pandas as pd
+    anime_store = get_vector_store()
+    manga_store = get_manga_vector_store()
+    df = pd.read_csv(DATASET_PATH)
+    return {
+        "total_anime": len(df),
+        "indexed_anime": anime_store.get_count(),
+        "indexed_manga": manga_store.get_count(),
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)

backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a055086a635d2cacb3a426111c19a9b788478cd9a55baaea33036c6cbf5b2b13
+size 29851236

backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9fdf8dfc32fae317f2cff4ec0a5f920a6591ff14b0f8721ad6c584b713d592dd
+size 100

backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/index_metadata.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7f71f0b2f29d673b4c6a282b9af736e2a498d132f162c4840b3d9aeb1501c89
+size 535698

backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b4cc8354a23c80c0abaf4485b174ea8bd44dc51c76c5d0acc1218cc1173df6e
+size 71244

backend/manga_chroma_db/466eefed-add1-4ba8-932f-06a367917727/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c442cbb882eeb83457c7747aff45504845f000234c962b652e33dc5c779cb82
+size 157876

backend/manga_chroma_db/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69e98a1267fe754f0ebf174c81cc5fe9823375d1fc66d7f78d462550a9ec5d68
+size 29241344