Spaces:

favoredone
/

CURFLOW

Paused

App Files Files Community

favoredone commited on Oct 27, 2025

Commit

dab3de5

verified ·

1 Parent(s): 93104ee

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +19 -0
app.py +748 -0
requirements.txt +7 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+RUN chmod -R 777 /app
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,748 @@

+import os
+import json
+import time
+import asyncio
+import aiohttp
+from typing import Dict, List, Set, Optional
+from urllib.parse import quote, urljoin
+from datetime import datetime
+from pathlib import Path
+import huggingface_hub
+from datasets import Dataset
+from fastapi import FastAPI, BackgroundTasks, HTTPException, status
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+from contextlib import asynccontextmanager
+import uvicorn
+# Path for storing tracking data
+TRACKS_DIR = Path("tracks_data")
+TRACKS_DIR.mkdir(exist_ok=True)
+# Hugging Face configuration
+HF_TOKEN = os.getenv("HF_TOKEN", "")
+HF_DATASET_ID = os.getenv("HF_DATASET_ID", "fred808/data")
+if not HF_TOKEN:
+    raise ValueError("HF_TOKEN environment variable is required")
+def get_track_file_path(course: str) -> Path:
+    """Get the path to the JSON file for storing course tracking results"""
+    safe_name = quote(course, safe='')
+    return TRACKS_DIR / f"{safe_name}_tracks.json"
+def save_tracks_to_file(course: str, tracks: List[Dict]) -> None:
+    """Save tracking results to a JSON file"""
+    try:
+        file_path = get_track_file_path(course)
+        with open(file_path, 'w', encoding='utf-8') as f:
+            json.dump(tracks, f, indent=2, ensure_ascii=False)
+        print(f"✓ Saved {len(tracks)} tracks for {course}")
+    except Exception as e:
+        print(f"Error saving tracks for {course}: {e}")
+def load_tracks_from_file(course: str) -> List[Dict]:
+    """Load existing tracking results from JSON file"""
+    try:
+        file_path = get_track_file_path(course)
+        if file_path.exists():
+            with open(file_path, 'r', encoding='utf-8') as f:
+                tracks = json.load(f)
+                print(f"✓ Loaded {len(tracks)} existing tracks for {course}")
+                return tracks
+    except Exception as e:
+        print(f"Error loading tracks for {course}: {e}")
+    return []
+# Configuration
+SOURCE_SERVER = "https://fred808-vs2.hf.space"
+CURSOR_SERVERS = [
+"https://elias2211-elias2211-4zhhex.hf.space/track_cursor_url",
+"https://elias2211-cur1.hf.space/track_cursor_url",
+]
+# This coordinator now sends image URLs to cursor-tracker servers.
+# The servers listed above expose an endpoint (originally /analyze) —
+# we will replace the trailing '/analyze' with '/track_cursor_url' when calling.
+# FastAPI Models
+class CourseInfo(BaseModel):
+    course_folder: str
+class ImageInfo(BaseModel):
+    filename: str
+class TrackRequest(BaseModel):
+    image_url: str
+    threshold: float = 0.8
+class TrackResponse(BaseModel):
+    success: bool
+    cursor_active: Optional[bool] = None
+    x: Optional[int] = None
+    y: Optional[int] = None
+    confidence: Optional[float] = None
+    template: Optional[str] = None
+    error: Optional[str] = None
+class ServerStatus(BaseModel):
+    url: str
+    model: str
+    busy: bool
+    total_processed: int
+    total_time: float
+    fps: float
+class ProcessingStatus(BaseModel):
+    course: str
+    total_images: int
+    processed_images: int
+    progress_percent: float
+    status: str
+class StartProcessingRequest(BaseModel):
+    courses: Optional[List[str]] = None  # If None, process all courses
+    continuous: bool = True  # Default to continuous like original
+# Lifespan context manager for startup/shutdown events
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Lifespan context manager for startup/shutdown events"""
+    # Startup
+    initialize_servers()
+    print("Cursor Tracking Coordinator API started")
+    print(f"Source server: {SOURCE_SERVER}")
+    print(f"Cursor servers: {len(CURSOR_SERVERS)}")
+    print(f"Hugging Face dataset: {HF_DATASET_ID}")
+    print(f"HF Token: {'✅ Set' if HF_TOKEN else '❌ Missing'}")
+    # Start processing automatically
+    if auto_start_processing:
+        print("Auto-starting processing loop...")
+        global is_processing, current_processing_task
+        is_processing = True
+        current_processing_task = asyncio.create_task(processing_loop())
+    yield  # Server is running
+    # Shutdown
+    if current_processing_task:
+        is_processing = False
+        current_processing_task.cancel()
+        try:
+            await current_processing_task
+        except asyncio.CancelledError:
+            pass
+# FastAPI App with lifespan
+app = FastAPI(
+    title="Cursor Tracking Coordinator API",
+    description="Distributed cursor-tracking coordinator",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# Global state
+processed_images: Dict[str, Set[str]] = {}  # {course: set(image_names)}
+course_tracks: Dict[str, List[Dict]] = {}  # {course: [{image, tracking, metadata}]}
+failed_images: Dict[str, Set[str]] = {}  # {course: set(image_names)}
+servers = []
+is_processing = False
+current_processing_task = None
+auto_start_processing = True  # Set to False if you don't want auto-start
+class CursorServer:
+    def __init__(self, url):
+        self.url = url
+        self.busy = False
+        self.model = "unknown"
+        self.total_processed = 0
+        self.total_time = 0
+    @property
+    def fps(self):
+        return self.total_processed / self.total_time if self.total_time > 0 else 0
+# Initialize servers
+def initialize_servers():
+    global servers
+    servers = [CursorServer(url) for url in CURSOR_SERVERS]
+# API Routes
+@app.get("/")
+async def root():
+    return {
+        "message": "Caption Coordinator API",
+        "status": "running",
+        "auto_processing": auto_start_processing,
+        "is_processing": is_processing
+    }
+@app.get("/health")
+async def health():
+    return {
+        "status": "healthy",
+        "servers_available": len([s for s in servers if not s.busy]),
+        "total_servers": len(servers),
+        "is_processing": is_processing,
+        "auto_processing": auto_start_processing
+    }
+@app.get("/courses")
+async def get_courses():
+    """Fetch available courses from source server"""
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{SOURCE_SERVER}/courses") as resp:
+                data = await resp.json()
+                if isinstance(data, dict) and 'courses' in data:
+                    return [c['course_folder'] for c in data['courses'] if isinstance(c, dict)]
+                return []
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error fetching courses: {e}")
+@app.get("/courses/{course}/images")
+async def get_course_images(course: str):
+    """Fetch images list for a course"""
+    try:
+        course_frames = f"{course}_frames" if not course.endswith("_frames") else course
+        url = f"{SOURCE_SERVER}/images/{quote(course_frames)}"
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url) as resp:
+                data = await resp.json()
+                if isinstance(data, dict) and 'images' in data:
+                    return data['images']
+                return []
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error fetching images: {e}")
+@app.get("/servers/status")
+async def get_servers_status():
+    """Get status of all caption servers"""
+    server_statuses = []
+    for server in servers:
+        server_statuses.append(ServerStatus(
+            url=server.url,
+            model=server.model,
+            busy=server.busy,
+            total_processed=server.total_processed,
+            total_time=server.total_time,
+            fps=server.fps
+        ))
+    return server_statuses
+@app.get("/processing/status")
+async def get_processing_status():
+    """Get current processing status"""
+    status_info = {}
+    for course in processed_images:
+        total = len(processed_images[course])
+        processed = len(course_tracks.get(course, []))
+        failed = len(failed_images.get(course, set()))
+        status_info[course] = {
+            "course": course,
+            "total_images": total,
+            "processed_images": processed,
+            "failed_images": failed,
+            "progress_percent": (processed / total * 100) if total > 0 else 0,
+            "status": "completed" if processed + failed >= total else "processing"
+        }
+    return status_info
+@app.post("/processing/start")
+async def start_processing(request: StartProcessingRequest = StartProcessingRequest()):
+    """Start caption processing"""
+    global is_processing, current_processing_task
+    if is_processing:
+        raise HTTPException(status_code=400, detail="Processing is already running")
+    is_processing = True
+    current_processing_task = asyncio.create_task(
+        processing_loop(request.courses, request.continuous)
+    )
+    return {
+        "message": "Processing started",
+        "continuous": request.continuous,
+        "specific_courses": request.courses
+    }
+@app.post("/processing/stop")
+async def stop_processing():
+    """Stop caption processing"""
+    global is_processing, current_processing_task
+    if not is_processing:
+        raise HTTPException(status_code=400, detail="Processing is not running")
+    is_processing = False
+    if current_processing_task:
+        current_processing_task.cancel()
+        try:
+            await current_processing_task
+        except asyncio.CancelledError:
+            pass
+        current_processing_task = None
+    return {"message": "Processing stopped"}
+@app.get("/tracks/{course}")
+async def get_tracks(course: str):
+    """Get tracking results for a specific course"""
+    tracks = load_tracks_from_file(course)
+    return {
+        "course": course,
+        "total_tracks": len(tracks),
+        "tracks": tracks
+    }
+@app.delete("/tracks/{course}")
+async def delete_tracks(course: str):
+    """Delete tracking results for a specific course"""
+    try:
+        file_path = get_track_file_path(course)
+        if file_path.exists():
+            file_path.unlink()
+            if course in processed_images:
+                del processed_images[course]
+            if course in course_tracks:
+                del course_tracks[course]
+            if course in failed_images:
+                del failed_images[course]
+            return {"message": f"Tracks for {course} deleted"}
+        else:
+            raise HTTPException(status_code=404, detail=f"No tracks found for {course}")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error deleting tracks: {e}")
+# Core processing functions
+async def fetch_courses() -> List[str]:
+    """Fetch available courses from source server"""
+    async with aiohttp.ClientSession() as session:
+        async with session.get(f"{SOURCE_SERVER}/courses") as resp:
+            data = await resp.json()
+            if isinstance(data, dict) and 'courses' in data:
+                return [c['course_folder'] for c in data['courses'] if isinstance(c, dict)]
+            return []
+async def fetch_course_images(course: str) -> List[Dict]:
+    """Fetch images list for a course"""
+    course_frames = f"{course}_frames" if not course.endswith("_frames") else course
+    url = f"{SOURCE_SERVER}/images/{quote(course_frames)}"
+    async with aiohttp.ClientSession() as session:
+        async with session.get(url) as resp:
+            data = await resp.json()
+            if isinstance(data, dict) and 'images' in data:
+                return data['images']
+            return []
+async def get_track(server: str, image_url: str, threshold: float = 0.8) -> Optional[Dict]:
+    """Send image_url to a cursor-tracker server's /track_cursor_url endpoint and return the JSON response."""
+    try:
+        # Replace trailing /analyze with /track_cursor_url if present
+        track_endpoint = server
+        if track_endpoint.endswith('/analyze'):
+            track_endpoint = track_endpoint.rsplit('/analyze', 1)[0] + '/track_cursor_url'
+        data = {
+            'image_url': image_url,
+            'threshold': str(threshold)
+        }
+        async with aiohttp.ClientSession() as session:
+            async with session.post(track_endpoint, data=data, timeout=30) as resp:
+                # Expect JSON with keys similar to TrackResponse
+                return await resp.json()
+    except Exception as e:
+        print(f"Error contacting tracker {server}: {e}")
+        return None
+async def get_server_info():
+    """Get basic info from cursor servers (health)"""
+    server_info = []
+    async with aiohttp.ClientSession() as session:
+        for server in CURSOR_SERVERS:
+            try:
+                health_url = server.rsplit('/analyze', 1)[0] + '/health' if server.endswith('/analyze') else server.rstrip('/') + '/health'
+                async with session.get(health_url, timeout=10) as resp:
+                    info = await resp.json()
+                    server_info.append({
+                        'url': server,
+                        'info': info
+                    })
+            except Exception as e:
+                print(f"Couldn't get health info from {server}: {e}")
+    return server_info
+async def process_image(server: CursorServer, course: str, image: Dict) -> Optional[Dict]:
+    """Process single image through one cursor server and return tracking metadata."""
+    if server.busy:
+        return None
+    server.busy = True
+    start_time = time.time()
+    try:
+        # Structure URL correctly: /images/COURSE_NAME_frames/IMAGE.png
+        course_frames = f"{course}_frames" if not course.endswith("_frames") else course
+        image_url = urljoin(SOURCE_SERVER, f"/images/{quote(course_frames)}/{quote(image['filename'])}")
+        result = await get_track(server.url, image_url)
+        processing_time = time.time() - start_time
+        server.total_time += processing_time
+        # Expect result to contain cursor tracking fields
+        if result and result.get('cursor_active') is not None:
+            server.total_processed += 1
+            metadata = {
+                "image": image['filename'],
+                "tracking": {
+                    "cursor_active": bool(result.get('cursor_active', False)),
+                    "x": result.get('x'),
+                    "y": result.get('y'),
+                    "confidence": float(result.get('confidence')) if result.get('confidence') is not None else None,
+                    "template": result.get('template')
+                },
+                "server": server.url,
+                "processing_time": processing_time,
+                "timestamp": datetime.now().isoformat()
+            }
+            print(f"Server {server.url} tracked {image['filename']} in {processing_time:.2f}s ({server.fps:.2f} fps)")
+            return metadata
+        else:
+            error_msg = result.get('error', 'Unknown error') if isinstance(result, dict) else 'No response'
+            print(f"Server {server.url} failed for {image['filename']}: {error_msg}")
+            return None
+    except asyncio.TimeoutError:
+        print(f"Server {server.url} timeout for {image['filename']}")
+        return None
+    except Exception as e:
+        print(f"Error processing {image['filename']} on {server.url}: {e}")
+        return None
+    finally:
+        server.busy = False
+async def upload_to_huggingface(course: str, metadata_list: List[Dict]):
+    """Upload course tracking results to Hugging Face dataset"""
+    try:
+        print(f"📤 Uploading {len(metadata_list)} tracks for {course} to Hugging Face...")
+        # Prepare data for Hugging Face dataset
+        dataset_data = {
+            "course": [],
+            "image_filename": [],
+            "cursor_active": [],
+            "x": [],
+            "y": [],
+            "confidence": [],
+            "template": [],
+            "processing_server": [],
+            "processing_time": [],
+            "timestamp": []
+        }
+        for metadata in metadata_list:
+            tr = metadata.get('tracking', {})
+            dataset_data["course"].append(course)
+            dataset_data["image_filename"].append(metadata.get("image"))
+            dataset_data["cursor_active"].append(bool(tr.get("cursor_active", False)))
+            dataset_data["x"].append(tr.get("x"))
+            dataset_data["y"].append(tr.get("y"))
+            dataset_data["confidence"].append(tr.get("confidence"))
+            dataset_data["template"].append(tr.get("template"))
+            dataset_data["processing_server"].append(metadata.get("server"))
+            dataset_data["processing_time"].append(metadata.get("processing_time"))
+            dataset_data["timestamp"].append(metadata.get("timestamp"))
+        # Create dataset
+        dataset = Dataset.from_dict(dataset_data)
+        # Login to Hugging Face
+        huggingface_hub.login(token=HF_TOKEN)
+        # Push to hub
+        dataset.push_to_hub(
+            HF_DATASET_ID,
+            config_name=course.replace("/", "_").replace(" ", "_"),
+            split="train",
+            commit_message=f"Add tracks for course {course} - {len(metadata_list)} images"
+        )
+        print(f"✅ Successfully uploaded {len(metadata_list)} tracks for {course} to {HF_DATASET_ID}")
+        return True
+    except Exception as e:
+        print(f"❌ Error uploading to Hugging Face: {e}")
+        return False
+async def process_course(course: str, servers: List[CursorServer]):
+    """Process all images in a course using available servers with proper retry logic"""
+    # Initialize course tracking
+    if course not in processed_images:
+        processed_images[course] = set()
+    if course not in course_tracks:
+        course_tracks[course] = load_tracks_from_file(course)
+        # Update processed images set from loaded tracks
+        for cap in course_tracks[course]:
+            processed_images[course].add(cap['image'])
+    if course not in failed_images:
+        failed_images[course] = set()
+    # Get list of images
+    images = await fetch_course_images(course)
+    if not images:
+        print(f"No images found for course {course}")
+        return
+    print(f"\nProcessing {len(images)} images for course {course}")
+    # Track images that need processing with retry count (5 retries)
+    pending_images = {}
+    for img in images:
+        filename = img['filename']
+        if filename not in processed_images[course] and filename not in failed_images[course]:
+            pending_images[filename] = {'image': img, 'retries': 0, 'max_retries': 5}
+    if not pending_images:
+        print(f"All images already processed or failed for course {course}")
+        print(f"- Processed: {len(processed_images[course])}, Failed: {len(failed_images[course])}")
+        # If course is completed, upload to Hugging Face
+        if len(processed_images[course]) + len(failed_images[course]) >= len(images):
+            if course_tracks[course]:
+                print(f"📤 Course {course} completed, uploading to Hugging Face...")
+                await upload_to_huggingface(course, course_tracks[course])
+        return
+    print(f"Images to process: {len(pending_images)} (already processed: {len(processed_images[course])}, failed: {len(failed_images[course])})")
+    batch_size = len([s for s in servers if not s.busy])
+    processed_in_this_run = 0
+    while pending_images and is_processing:
+        # Create tasks for each available server
+        tasks = []
+        assigned_images = []
+        for server in servers:
+            if not server.busy and pending_images:
+                # Get the next pending image
+                filename, img_data = next(iter(pending_images.items()))
+                img = img_data['image']
+                # Assign this image to the server
+                tasks.append(process_image(server, course, img))
+                assigned_images.append((filename, img, img_data['retries']))
+                # Remove from pending temporarily while it's being processed
+                del pending_images[filename]
+        if not tasks:
+            # If no servers available, wait a bit
+            await asyncio.sleep(0.1)
+            continue
+        # Process images in parallel across servers
+        results = await asyncio.gather(*tasks)
+        # Handle results and retry logic
+        has_new_results = False
+        for (filename, img, current_retries), result in zip(assigned_images, results):
+            if result:
+                # Success - image was processed
+                processed_images[course].add(filename)
+                course_tracks[course].append(result)
+                has_new_results = True
+                processed_in_this_run += 1
+                print(f"✓ Successfully processed {filename}")
+            else:
+                # Failure - check if we should retry
+                if current_retries < 5:  # max_retries
+                    # Put back in pending for retry with incremented retry count
+                    pending_images[filename] = {
+                        'image': img,
+                        'retries': current_retries + 1,
+                        'max_retries': 5
+                    }
+                    print(f"↻ Retry {current_retries + 1}/5 for {filename}")
+                else:
+                    # Max retries exceeded, mark as failed
+                    failed_images[course].add(filename)
+                    print(f"✗ Failed to process {filename} after 5 retries")
+        # Save progress after each batch with new results
+        if has_new_results:
+            save_tracks_to_file(course, course_tracks[course])
+        # Show progress
+        total = len(images)
+        done = len(processed_images[course])
+        failed_count = len(failed_images[course])
+        pending_count = len(pending_images)
+        progress_percent = (done / total * 100) if total > 0 else 0
+        print(f"\rProgress: {done}/{total} ({progress_percent:.1f}%) - {pending_count} pending, {failed_count} failed, {processed_in_this_run} new", end="", flush=True)
+        # Small delay to prevent overwhelming the servers
+        await asyncio.sleep(0.5)
+    # Final status for this course
+    total = len(images)
+    done = len(processed_images[course])
+    failed_count = len(failed_images[course])
+    if done + failed_count >= total:
+        if failed_count > 0:
+            print(f"\n✓ Course {course} completed with {failed_count} failed images")
+        else:
+            print(f"\n✓ Course {course} fully completed")
+        # Upload to Hugging Face when course is completed
+        if course_tracks[course]:
+            print(f"📤 Uploading {len(course_tracks[course])} tracks to Hugging Face...")
+            success = await upload_to_huggingface(course, course_tracks[course])
+            if success:
+                print(f"✅ Successfully uploaded {course} to Hugging Face")
+            else:
+                print(f"❌ Failed to upload {course} to Hugging Face")
+    else:
+        print(f"\n→ Course {course} partially completed: {done}/{total} processed, {failed_count} failed")
+async def processing_loop(specific_courses: Optional[List[str]] = None, continuous: bool = True):
+    """Main processing loop with proper error handling"""
+    global is_processing
+    # Get model information and verify Florence-2-large availability
+    server_info = await get_server_info()
+    print("\nCursor servers (health check):")
+    available_servers = []
+    # Map server info responses to our server objects (servers list)
+    for info, server in zip(server_info, servers):
+        server.model = info.get('info', {}).get('model_choice', 'cursor-tracker') if isinstance(info, dict) else 'cursor-tracker'
+        available_servers.append(server)
+        print(f"- {server.url}: {server.model}")
+    if not available_servers:
+        print(f"\nError: No available cursor servers!")
+        is_processing = False
+        return
+    processing_servers = available_servers
+    print(f"\nUsing {len(processing_servers)} cursor servers")
+    # Check for existing track files and report
+    existing_tracks = list(TRACKS_DIR.glob("*_tracks.json"))
+    if existing_tracks:
+        print("\nFound existing track files:")
+        for cap_file in existing_tracks:
+            course = cap_file.stem.replace("_tracks", "")
+            try:
+                with open(cap_file, 'r', encoding='utf-8') as f:
+                    tracks = json.load(f)
+                    print(f"- {course}: {len(tracks)} tracks")
+            except Exception as e:
+                print(f"- Error reading {cap_file.name}: {e}")
+        print()
+    start_time = time.time()
+    iteration = 0
+    while is_processing:
+        try:
+            iteration += 1
+            print(f"\n{'='*50}")
+            print(f"Processing Iteration {iteration}")
+            print(f"{'='*50}")
+            # Get available courses
+            if specific_courses:
+                courses = specific_courses
+                print(f"Processing specific courses: {courses}")
+            else:
+                courses = await fetch_courses()
+                print(f"Found {len(courses)} courses")
+            if not courses:
+                print("No courses found, waiting...")
+                if not continuous:
+                    break
+                await asyncio.sleep(10)
+                continue
+            # Process each course with all available servers
+            for course in courses:
+                if not is_processing:
+                    break
+                print(f"\n--- Processing course: {course} ---")
+                await process_course(course, processing_servers)
+            # Show server stats
+            print("\nServer Stats:")
+            total_processed = sum(s.total_processed for s in processing_servers)
+            elapsed = time.time() - start_time
+            if elapsed > 0:
+                print(f"Total images processed: {total_processed}")
+                print(f"Overall speed: {total_processed/elapsed:.2f} fps")
+                for s in processing_servers:
+                    print(f"- {s.url}: {s.total_processed} images, {s.fps:.2f} fps")
+            print()
+            if not continuous:
+                print("One-time processing completed")
+                break
+            # Wait before next check
+            print("Waiting for new courses...")
+            await asyncio.sleep(5)
+        except asyncio.CancelledError:
+            print("Processing cancelled")
+            break
+        except Exception as e:
+            print(f"Error in processing loop: {str(e)}")
+            import traceback
+            traceback.print_exc()
+            await asyncio.sleep(10)
+    is_processing = False
+    print("Processing loop stopped")
+# Lifespan context manager for startup/shutdown events
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Lifespan context manager for startup/shutdown events"""
+    # Startup
+    initialize_servers()
+    print("Cursor Tracking Coordinator API started")
+    print(f"Source server: {SOURCE_SERVER}")
+    print(f"Cursor servers: {len(CURSOR_SERVERS)}")
+    print(f"Hugging Face dataset: {HF_DATASET_ID}")
+    print(f"HF Token: {'✅ Set' if HF_TOKEN else '❌ Missing'}")
+    # Start processing automatically
+    if auto_start_processing:
+        print("Auto-starting processing loop...")
+        global is_processing, current_processing_task
+        is_processing = True
+        current_processing_task = asyncio.create_task(processing_loop())
+    yield  # Server is running
+    # Shutdown
+    if current_processing_task:
+        is_processing = False
+        current_processing_task.cancel()
+        try:
+            await current_processing_task
+        except asyncio.CancelledError:
+            pass
+if __name__ == "__main__":
+    # Run with module-style import string for reload support
+    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi==0.104.1
+uvicorn==0.24.0
+aiofiles==23.2.1
+python-multipart==0.0.6
+huggingface-hub==0.18.0
+aiohttp
+datasets