Spaces:
Sleeping
Sleeping
final space code ready
Browse files- app/backend/hub.py +110 -24
- app/backend/main.py +21 -0
- app/backend/storage.py +0 -69
app/backend/hub.py
CHANGED
|
@@ -1,13 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# /app/backend/hub.py
|
| 2 |
import os
|
| 3 |
import logging
|
| 4 |
from typing import Optional, Dict
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
|
|
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def get_hf_api(token: Optional[str] = None) -> Optional[HfApi]:
|
| 13 |
"""Initializes HfApi using a provided token or the environment secret."""
|
|
@@ -16,48 +96,54 @@ def get_hf_api(token: Optional[str] = None) -> Optional[HfApi]:
|
|
| 16 |
return HfApi(token=token_to_use)
|
| 17 |
return None
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
def get_user_info(token: Optional[str] = None) -> Optional[Dict]:
|
| 20 |
-
"""Fetches user info from the Hub (returns None if unauthenticated)."""
|
| 21 |
api = get_hf_api(token)
|
| 22 |
-
if api:
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
|
| 32 |
-
def push_dataset_to_hub(folder_path: str, repo_name: str, namespace: str, private: bool, commit_message: str, token:
|
| 33 |
"""
|
| 34 |
-
Push the
|
| 35 |
-
|
| 36 |
"""
|
| 37 |
-
api =
|
| 38 |
-
if not api:
|
| 39 |
-
raise ConnectionError("Hugging Face token not found. Provide token or set HF_TOKEN in environment or .env file.")
|
| 40 |
-
|
| 41 |
repo_id = f"{namespace}/{repo_name}"
|
| 42 |
|
| 43 |
# create repo if not exists
|
| 44 |
try:
|
| 45 |
-
api.create_repo(name=repo_name, token=token
|
| 46 |
-
logger.info(f"Created repo {repo_id}
|
| 47 |
except Exception as e:
|
| 48 |
logger.info(f"Repo create warning (may already exist): {e}")
|
| 49 |
|
| 50 |
-
# upload entire folder (recursively) using upload_folder
|
| 51 |
try:
|
| 52 |
upload_folder(
|
| 53 |
folder_path=folder_path,
|
| 54 |
path_in_repo="",
|
| 55 |
repo_id=repo_id,
|
| 56 |
-
token=token
|
| 57 |
repo_type="dataset",
|
| 58 |
commit_message=commit_message,
|
| 59 |
)
|
| 60 |
logger.info(f"Uploaded folder to {repo_id} from {folder_path}")
|
| 61 |
except Exception as e:
|
| 62 |
logger.exception(f"upload_folder failed for {folder_path} -> {repo_id}: {e}")
|
| 63 |
-
raise
|
|
|
|
| 1 |
+
# # /app/backend/hub.py
|
| 2 |
+
# import os
|
| 3 |
+
# import logging
|
| 4 |
+
# from typing import Optional, Dict
|
| 5 |
+
# from pathlib import Path
|
| 6 |
+
|
| 7 |
+
# from huggingface_hub import HfApi, upload_folder, Repository, create_repo
|
| 8 |
+
|
| 9 |
+
# logger = logging.getLogger(__name__)
|
| 10 |
+
# logger.addHandler(logging.NullHandler())
|
| 11 |
+
|
| 12 |
+
# def get_hf_api(token: Optional[str] = None) -> Optional[HfApi]:
|
| 13 |
+
# """Initializes HfApi using a provided token or the environment secret."""
|
| 14 |
+
# token_to_use = token or os.getenv("HF_TOKEN")
|
| 15 |
+
# if token_to_use:
|
| 16 |
+
# return HfApi(token=token_to_use)
|
| 17 |
+
# return None
|
| 18 |
+
|
| 19 |
+
# def get_user_info(token: Optional[str] = None) -> Optional[Dict]:
|
| 20 |
+
# """Fetches user info from the Hub (returns None if unauthenticated)."""
|
| 21 |
+
# api = get_hf_api(token)
|
| 22 |
+
# if api:
|
| 23 |
+
# try:
|
| 24 |
+
# return api.whoami()
|
| 25 |
+
# except Exception as e:
|
| 26 |
+
# # print(f"Failed to authenticate with Hugging Face Hub: {e}")
|
| 27 |
+
# logger.warning("Failed to authenticate with Hugging Face Hub: %s", e)
|
| 28 |
+
# return None
|
| 29 |
+
# return None
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# def push_dataset_to_hub(folder_path: str, repo_name: str, namespace: str, private: bool, commit_message: str, token: Optional[str] = None):
|
| 33 |
+
# """
|
| 34 |
+
# Push the contents of folder_path to the Hugging Face datasets repo `namespace/repo_name`.
|
| 35 |
+
# Uses token param if provided, otherwise falls back to HF_TOKEN from environment/.env.
|
| 36 |
+
# """
|
| 37 |
+
# api = get_hf_api(token)
|
| 38 |
+
# if not api:
|
| 39 |
+
# raise ConnectionError("Hugging Face token not found. Provide token or set HF_TOKEN in environment or .env file.")
|
| 40 |
+
|
| 41 |
+
# repo_id = f"{namespace}/{repo_name}"
|
| 42 |
+
|
| 43 |
+
# # create repo if not exists
|
| 44 |
+
# try:
|
| 45 |
+
# api.create_repo(name=repo_name, token=token or os.getenv("HF_TOKEN"), repo_type="dataset", private=private, namespace=namespace)
|
| 46 |
+
# logger.info(f"Created repo {repo_id} (or it already exists)")
|
| 47 |
+
# except Exception as e:
|
| 48 |
+
# logger.info(f"Repo create warning (may already exist): {e}")
|
| 49 |
+
|
| 50 |
+
# # upload entire folder (recursively) using upload_folder helper
|
| 51 |
+
# try:
|
| 52 |
+
# upload_folder(
|
| 53 |
+
# folder_path=folder_path,
|
| 54 |
+
# path_in_repo="",
|
| 55 |
+
# repo_id=repo_id,
|
| 56 |
+
# token=token or os.getenv("HF_TOKEN"),
|
| 57 |
+
# repo_type="dataset",
|
| 58 |
+
# commit_message=commit_message,
|
| 59 |
+
# )
|
| 60 |
+
# logger.info(f"Uploaded folder to {repo_id} from {folder_path}")
|
| 61 |
+
# except Exception as e:
|
| 62 |
+
# logger.exception(f"upload_folder failed for {folder_path} -> {repo_id}: {e}")
|
| 63 |
+
# raise
|
| 64 |
+
|
| 65 |
# /app/backend/hub.py
|
| 66 |
import os
|
| 67 |
import logging
|
| 68 |
from typing import Optional, Dict
|
| 69 |
from pathlib import Path
|
| 70 |
|
| 71 |
+
# third-party
|
| 72 |
+
from huggingface_hub import HfApi, upload_folder, create_repo
|
| 73 |
+
|
| 74 |
+
# dotenv helper to load .env when running locally
|
| 75 |
+
from dotenv import load_dotenv
|
| 76 |
|
| 77 |
+
# explicit logger for this module
|
| 78 |
logger = logging.getLogger(__name__)
|
| 79 |
+
|
| 80 |
+
# Attempt to load .env from the project root (two levels up: app/backend -> project root)
|
| 81 |
+
try:
|
| 82 |
+
env_path = Path(__file__).resolve().parents[2] / ".env"
|
| 83 |
+
if env_path.exists():
|
| 84 |
+
load_dotenv(dotenv_path=env_path)
|
| 85 |
+
logger.info(f"Loaded .env from {env_path}")
|
| 86 |
+
else:
|
| 87 |
+
# Try default load (cwd) as a fallback
|
| 88 |
+
load_dotenv()
|
| 89 |
+
except Exception as e:
|
| 90 |
+
logger.debug(f"Could not load .env automatically: {e}")
|
| 91 |
|
| 92 |
def get_hf_api(token: Optional[str] = None) -> Optional[HfApi]:
|
| 93 |
"""Initializes HfApi using a provided token or the environment secret."""
|
|
|
|
| 96 |
return HfApi(token=token_to_use)
|
| 97 |
return None
|
| 98 |
|
| 99 |
+
# def get_user_info(token: Optional[str] = None) -> Optional[Dict]:
|
| 100 |
+
# """Fetches user info from the Hub."""
|
| 101 |
+
# api = get_hf_api(token)
|
| 102 |
+
# if api:
|
| 103 |
+
# try:
|
| 104 |
+
# return api.whoami()
|
| 105 |
+
# except Exception as e:
|
| 106 |
+
# logger.warning(f"Failed to authenticate with Hugging Face Hub: {e}")
|
| 107 |
+
# return None
|
| 108 |
+
# return None
|
| 109 |
+
|
| 110 |
def get_user_info(token: Optional[str] = None) -> Optional[Dict]:
|
|
|
|
| 111 |
api = get_hf_api(token)
|
| 112 |
+
if not api:
|
| 113 |
+
logger.warning("Hugging Face authentication not available. Please check HF_TOKEN.")
|
| 114 |
+
return None
|
| 115 |
+
try:
|
| 116 |
+
return api.whoami()
|
| 117 |
+
except Exception as e:
|
| 118 |
+
logger.warning(f"Failed to authenticate with Hugging Face Hub: {e}")
|
| 119 |
+
return None
|
|
|
|
| 120 |
|
| 121 |
+
def push_dataset_to_hub(folder_path: str, repo_name: str, namespace: str, private: bool, commit_message: str, token: str):
|
| 122 |
"""
|
| 123 |
+
Push the folder at folder_path to the Hugging Face datasets repo `namespace/repo_name`.
|
| 124 |
+
This uploads everything in folder_path (including README.md, manifest.csv, images/, annotations/).
|
| 125 |
"""
|
| 126 |
+
api = HfApi(token=token)
|
|
|
|
|
|
|
|
|
|
| 127 |
repo_id = f"{namespace}/{repo_name}"
|
| 128 |
|
| 129 |
# create repo if not exists
|
| 130 |
try:
|
| 131 |
+
api.create_repo(name=repo_name, token=token, repo_type="dataset", private=private, namespace=namespace)
|
| 132 |
+
logger.info(f"Created repo {repo_id}")
|
| 133 |
except Exception as e:
|
| 134 |
logger.info(f"Repo create warning (may already exist): {e}")
|
| 135 |
|
| 136 |
+
# upload entire folder (recursively) using upload_folder
|
| 137 |
try:
|
| 138 |
upload_folder(
|
| 139 |
folder_path=folder_path,
|
| 140 |
path_in_repo="",
|
| 141 |
repo_id=repo_id,
|
| 142 |
+
token=token,
|
| 143 |
repo_type="dataset",
|
| 144 |
commit_message=commit_message,
|
| 145 |
)
|
| 146 |
logger.info(f"Uploaded folder to {repo_id} from {folder_path}")
|
| 147 |
except Exception as e:
|
| 148 |
logger.exception(f"upload_folder failed for {folder_path} -> {repo_id}: {e}")
|
| 149 |
+
raise
|
app/backend/main.py
CHANGED
|
@@ -17,6 +17,27 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
| 17 |
from starlette.staticfiles import StaticFiles
|
| 18 |
from fastapi.exceptions import RequestValidationError
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
from .schemas import *
|
| 21 |
from .storage import (
|
| 22 |
create_new_session, get_session_path, save_session_config,
|
|
|
|
| 17 |
from starlette.staticfiles import StaticFiles
|
| 18 |
from fastapi.exceptions import RequestValidationError
|
| 19 |
|
| 20 |
+
|
| 21 |
+
from pathlib import Path
|
| 22 |
+
from dotenv import load_dotenv
|
| 23 |
+
import os
|
| 24 |
+
import logging
|
| 25 |
+
|
| 26 |
+
# Attempt to load .env from repo root (two levels up: app/backend -> project root)
|
| 27 |
+
env_path = Path(__file__).resolve().parents[2] / ".env"
|
| 28 |
+
if env_path.exists():
|
| 29 |
+
load_dotenv(dotenv_path=env_path)
|
| 30 |
+
logging.getLogger(__name__).info(f"Loaded .env from {env_path}; HF_TOKEN present? {'yes' if os.getenv('HF_TOKEN') else 'no'}")
|
| 31 |
+
else:
|
| 32 |
+
# fallback to default load (cwd)
|
| 33 |
+
load_dotenv()
|
| 34 |
+
logging.getLogger(__name__).info(f"Loaded .env from cwd; HF_TOKEN present? {'yes' if os.getenv('HF_TOKEN') else 'no'}")
|
| 35 |
+
|
| 36 |
+
logger = logging.getLogger(__name__)
|
| 37 |
+
logger.setLevel(logging.INFO)
|
| 38 |
+
logger.info(f"ENV HF_TOKEN present? {'yes' if os.getenv('HF_TOKEN') else 'no'}")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
from .schemas import *
|
| 42 |
from .storage import (
|
| 43 |
create_new_session, get_session_path, save_session_config,
|
app/backend/storage.py
CHANGED
|
@@ -79,75 +79,6 @@ def extract_zip(zip_file_path: Path, extract_to: Path) -> List[str]:
|
|
| 79 |
image_filenames.append(sanitized_id)
|
| 80 |
return image_filenames
|
| 81 |
|
| 82 |
-
# def move_labeled_file(session_id: str, image_id: str, variety: str, disease: str) -> Tuple[Path, Path]:
|
| 83 |
-
# """Moves a labeled image to its final structured directory and returns the new paths.
|
| 84 |
-
#
|
| 85 |
-
# NOTE: this function updates metadata.processed_path to a RELATIVE path inside the final dataset
|
| 86 |
-
# (images/<variety>/<disease>/<image_id>) and writes a per-image annotation JSON that excludes
|
| 87 |
-
# original_path, mask_path and status (as requested).
|
| 88 |
-
# """
|
| 89 |
-
# config = load_session_config(session_id)
|
| 90 |
-
# if image_id not in config.image_metadata:
|
| 91 |
-
# raise FileNotFoundError(f"Image id {image_id} not found in session config")
|
| 92 |
-
# metadata = config.image_metadata[image_id]
|
| 93 |
-
#
|
| 94 |
-
# final_dataset_slug = "tulasi-curated-dataset"
|
| 95 |
-
# final_base_path = OUTPUT_DIR / final_dataset_slug
|
| 96 |
-
# final_image_dir = final_base_path / "images" / variety / disease
|
| 97 |
-
# final_mask_dir = final_base_path / "masks"
|
| 98 |
-
# final_annotations_dir = final_base_path / "annotations"
|
| 99 |
-
#
|
| 100 |
-
# final_image_dir.mkdir(parents=True, exist_ok=True)
|
| 101 |
-
# final_annotations_dir.mkdir(parents=True, exist_ok=True)
|
| 102 |
-
#
|
| 103 |
-
# Prefer processed image if present, otherwise fall back to original
|
| 104 |
-
# source_path_str = metadata.processed_path or metadata.original_path
|
| 105 |
-
# source_path = Path(source_path_str)
|
| 106 |
-
#
|
| 107 |
-
# Resolve relative paths (relative to session)
|
| 108 |
-
# if not source_path.is_absolute():
|
| 109 |
-
# source_path = get_session_path(session_id) / source_path
|
| 110 |
-
#
|
| 111 |
-
# if not source_path.exists():
|
| 112 |
-
# raise FileNotFoundError(f"Source image file not found: {source_path}")
|
| 113 |
-
#
|
| 114 |
-
# final_image_path = final_image_dir / image_id
|
| 115 |
-
# use copy2 to preserve timestamps/metadata
|
| 116 |
-
# shutil.copy2(source_path, final_image_path)
|
| 117 |
-
#
|
| 118 |
-
# Update metadata to point to relative paths inside final dataset
|
| 119 |
-
# metadata.processed_path = str(final_image_path)
|
| 120 |
-
# Update metadata to point to relative path inside final dataset (so HF can resolve it)
|
| 121 |
-
# metadata.processed_path = str(Path("images") / variety / disease / image_id)
|
| 122 |
-
#
|
| 123 |
-
# save_session_config(session_id, config)
|
| 124 |
-
#
|
| 125 |
-
# Write a per-image annotation sidecar in final dataset WITHOUT original_path/mask_path/status
|
| 126 |
-
# try:
|
| 127 |
-
# produce a plain dict (compat for pydantic v2 / v1)
|
| 128 |
-
# if hasattr(metadata, "model_dump"):
|
| 129 |
-
# meta_dict = metadata.model_dump()
|
| 130 |
-
# elif hasattr(metadata, "dict"):
|
| 131 |
-
# meta_dict = metadata.dict()
|
| 132 |
-
# else:
|
| 133 |
-
# meta_dict = dict(metadata)
|
| 134 |
-
#
|
| 135 |
-
# remove unwanted fields
|
| 136 |
-
# meta_dict.pop("original_path", None)
|
| 137 |
-
# meta_dict.pop("mask_path", None)
|
| 138 |
-
# meta_dict.pop("status", None)
|
| 139 |
-
#
|
| 140 |
-
# write a per-image annotation sidecar in final dataset
|
| 141 |
-
# with open(final_annotations_dir / f"{Path(image_id).stem}.json", "w", encoding="utf-8") as f:
|
| 142 |
-
# f.write(metadata.model_dump_json(indent=2))
|
| 143 |
-
# json.dump(meta_dict, f, indent=2, ensure_ascii=False)
|
| 144 |
-
# except Exception:
|
| 145 |
-
# best-effort writing annotation — don't fail the labeling step if this fails
|
| 146 |
-
# pass
|
| 147 |
-
#
|
| 148 |
-
# return final_base_path, final_image_path
|
| 149 |
-
|
| 150 |
-
|
| 151 |
def move_labeled_file(session_id: str, image_id: str, variety: str, disease: str) -> Tuple[Path, Path]:
|
| 152 |
"""Moves a labeled image to its final structured directory and returns the new paths.
|
| 153 |
|
|
|
|
| 79 |
image_filenames.append(sanitized_id)
|
| 80 |
return image_filenames
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
def move_labeled_file(session_id: str, image_id: str, variety: str, disease: str) -> Tuple[Path, Path]:
|
| 83 |
"""Moves a labeled image to its final structured directory and returns the new paths.
|
| 84 |
|