Subh775 commited on
Commit
d0c0011
·
1 Parent(s): 60d4c0d

final space code ready

Browse files
Files changed (3) hide show
  1. app/backend/hub.py +110 -24
  2. app/backend/main.py +21 -0
  3. app/backend/storage.py +0 -69
app/backend/hub.py CHANGED
@@ -1,13 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /app/backend/hub.py
2
  import os
3
  import logging
4
  from typing import Optional, Dict
5
  from pathlib import Path
6
 
7
- from huggingface_hub import HfApi, upload_folder, Repository, create_repo
 
 
 
 
8
 
 
9
  logger = logging.getLogger(__name__)
10
- logger.addHandler(logging.NullHandler())
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def get_hf_api(token: Optional[str] = None) -> Optional[HfApi]:
13
  """Initializes HfApi using a provided token or the environment secret."""
@@ -16,48 +96,54 @@ def get_hf_api(token: Optional[str] = None) -> Optional[HfApi]:
16
  return HfApi(token=token_to_use)
17
  return None
18
 
 
 
 
 
 
 
 
 
 
 
 
19
  def get_user_info(token: Optional[str] = None) -> Optional[Dict]:
20
- """Fetches user info from the Hub (returns None if unauthenticated)."""
21
  api = get_hf_api(token)
22
- if api:
23
- try:
24
- return api.whoami()
25
- except Exception as e:
26
- # print(f"Failed to authenticate with Hugging Face Hub: {e}")
27
- logger.warning("Failed to authenticate with Hugging Face Hub: %s", e)
28
- return None
29
- return None
30
-
31
 
32
- def push_dataset_to_hub(folder_path: str, repo_name: str, namespace: str, private: bool, commit_message: str, token: Optional[str] = None):
33
  """
34
- Push the contents of folder_path to the Hugging Face datasets repo `namespace/repo_name`.
35
- Uses token param if provided, otherwise falls back to HF_TOKEN from environment/.env.
36
  """
37
- api = get_hf_api(token)
38
- if not api:
39
- raise ConnectionError("Hugging Face token not found. Provide token or set HF_TOKEN in environment or .env file.")
40
-
41
  repo_id = f"{namespace}/{repo_name}"
42
 
43
  # create repo if not exists
44
  try:
45
- api.create_repo(name=repo_name, token=token or os.getenv("HF_TOKEN"), repo_type="dataset", private=private, namespace=namespace)
46
- logger.info(f"Created repo {repo_id} (or it already exists)")
47
  except Exception as e:
48
  logger.info(f"Repo create warning (may already exist): {e}")
49
 
50
- # upload entire folder (recursively) using upload_folder helper
51
  try:
52
  upload_folder(
53
  folder_path=folder_path,
54
  path_in_repo="",
55
  repo_id=repo_id,
56
- token=token or os.getenv("HF_TOKEN"),
57
  repo_type="dataset",
58
  commit_message=commit_message,
59
  )
60
  logger.info(f"Uploaded folder to {repo_id} from {folder_path}")
61
  except Exception as e:
62
  logger.exception(f"upload_folder failed for {folder_path} -> {repo_id}: {e}")
63
- raise
 
1
+ # # /app/backend/hub.py
2
+ # import os
3
+ # import logging
4
+ # from typing import Optional, Dict
5
+ # from pathlib import Path
6
+
7
+ # from huggingface_hub import HfApi, upload_folder, Repository, create_repo
8
+
9
+ # logger = logging.getLogger(__name__)
10
+ # logger.addHandler(logging.NullHandler())
11
+
12
+ # def get_hf_api(token: Optional[str] = None) -> Optional[HfApi]:
13
+ # """Initializes HfApi using a provided token or the environment secret."""
14
+ # token_to_use = token or os.getenv("HF_TOKEN")
15
+ # if token_to_use:
16
+ # return HfApi(token=token_to_use)
17
+ # return None
18
+
19
+ # def get_user_info(token: Optional[str] = None) -> Optional[Dict]:
20
+ # """Fetches user info from the Hub (returns None if unauthenticated)."""
21
+ # api = get_hf_api(token)
22
+ # if api:
23
+ # try:
24
+ # return api.whoami()
25
+ # except Exception as e:
26
+ # # print(f"Failed to authenticate with Hugging Face Hub: {e}")
27
+ # logger.warning("Failed to authenticate with Hugging Face Hub: %s", e)
28
+ # return None
29
+ # return None
30
+
31
+
32
+ # def push_dataset_to_hub(folder_path: str, repo_name: str, namespace: str, private: bool, commit_message: str, token: Optional[str] = None):
33
+ # """
34
+ # Push the contents of folder_path to the Hugging Face datasets repo `namespace/repo_name`.
35
+ # Uses token param if provided, otherwise falls back to HF_TOKEN from environment/.env.
36
+ # """
37
+ # api = get_hf_api(token)
38
+ # if not api:
39
+ # raise ConnectionError("Hugging Face token not found. Provide token or set HF_TOKEN in environment or .env file.")
40
+
41
+ # repo_id = f"{namespace}/{repo_name}"
42
+
43
+ # # create repo if not exists
44
+ # try:
45
+ # api.create_repo(name=repo_name, token=token or os.getenv("HF_TOKEN"), repo_type="dataset", private=private, namespace=namespace)
46
+ # logger.info(f"Created repo {repo_id} (or it already exists)")
47
+ # except Exception as e:
48
+ # logger.info(f"Repo create warning (may already exist): {e}")
49
+
50
+ # # upload entire folder (recursively) using upload_folder helper
51
+ # try:
52
+ # upload_folder(
53
+ # folder_path=folder_path,
54
+ # path_in_repo="",
55
+ # repo_id=repo_id,
56
+ # token=token or os.getenv("HF_TOKEN"),
57
+ # repo_type="dataset",
58
+ # commit_message=commit_message,
59
+ # )
60
+ # logger.info(f"Uploaded folder to {repo_id} from {folder_path}")
61
+ # except Exception as e:
62
+ # logger.exception(f"upload_folder failed for {folder_path} -> {repo_id}: {e}")
63
+ # raise
64
+
65
  # /app/backend/hub.py
66
  import os
67
  import logging
68
  from typing import Optional, Dict
69
  from pathlib import Path
70
 
71
+ # third-party
72
+ from huggingface_hub import HfApi, upload_folder, create_repo
73
+
74
+ # dotenv helper to load .env when running locally
75
+ from dotenv import load_dotenv
76
 
77
+ # explicit logger for this module
78
  logger = logging.getLogger(__name__)
79
+
80
+ # Attempt to load .env from the project root (two levels up: app/backend -> project root)
81
+ try:
82
+ env_path = Path(__file__).resolve().parents[2] / ".env"
83
+ if env_path.exists():
84
+ load_dotenv(dotenv_path=env_path)
85
+ logger.info(f"Loaded .env from {env_path}")
86
+ else:
87
+ # Try default load (cwd) as a fallback
88
+ load_dotenv()
89
+ except Exception as e:
90
+ logger.debug(f"Could not load .env automatically: {e}")
91
 
92
  def get_hf_api(token: Optional[str] = None) -> Optional[HfApi]:
93
  """Initializes HfApi using a provided token or the environment secret."""
 
96
  return HfApi(token=token_to_use)
97
  return None
98
 
99
+ # def get_user_info(token: Optional[str] = None) -> Optional[Dict]:
100
+ # """Fetches user info from the Hub."""
101
+ # api = get_hf_api(token)
102
+ # if api:
103
+ # try:
104
+ # return api.whoami()
105
+ # except Exception as e:
106
+ # logger.warning(f"Failed to authenticate with Hugging Face Hub: {e}")
107
+ # return None
108
+ # return None
109
+
110
  def get_user_info(token: Optional[str] = None) -> Optional[Dict]:
 
111
  api = get_hf_api(token)
112
+ if not api:
113
+ logger.warning("Hugging Face authentication not available. Please check HF_TOKEN.")
114
+ return None
115
+ try:
116
+ return api.whoami()
117
+ except Exception as e:
118
+ logger.warning(f"Failed to authenticate with Hugging Face Hub: {e}")
119
+ return None
 
120
 
121
+ def push_dataset_to_hub(folder_path: str, repo_name: str, namespace: str, private: bool, commit_message: str, token: str):
122
  """
123
+ Push the folder at folder_path to the Hugging Face datasets repo `namespace/repo_name`.
124
+ This uploads everything in folder_path (including README.md, manifest.csv, images/, annotations/).
125
  """
126
+ api = HfApi(token=token)
 
 
 
127
  repo_id = f"{namespace}/{repo_name}"
128
 
129
  # create repo if not exists
130
  try:
131
+ api.create_repo(name=repo_name, token=token, repo_type="dataset", private=private, namespace=namespace)
132
+ logger.info(f"Created repo {repo_id}")
133
  except Exception as e:
134
  logger.info(f"Repo create warning (may already exist): {e}")
135
 
136
+ # upload entire folder (recursively) using upload_folder
137
  try:
138
  upload_folder(
139
  folder_path=folder_path,
140
  path_in_repo="",
141
  repo_id=repo_id,
142
+ token=token,
143
  repo_type="dataset",
144
  commit_message=commit_message,
145
  )
146
  logger.info(f"Uploaded folder to {repo_id} from {folder_path}")
147
  except Exception as e:
148
  logger.exception(f"upload_folder failed for {folder_path} -> {repo_id}: {e}")
149
+ raise
app/backend/main.py CHANGED
@@ -17,6 +17,27 @@ from fastapi.middleware.cors import CORSMiddleware
17
  from starlette.staticfiles import StaticFiles
18
  from fastapi.exceptions import RequestValidationError
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  from .schemas import *
21
  from .storage import (
22
  create_new_session, get_session_path, save_session_config,
 
17
  from starlette.staticfiles import StaticFiles
18
  from fastapi.exceptions import RequestValidationError
19
 
20
+
21
+ from pathlib import Path
22
+ from dotenv import load_dotenv
23
+ import os
24
+ import logging
25
+
26
+ # Attempt to load .env from repo root (two levels up: app/backend -> project root)
27
+ env_path = Path(__file__).resolve().parents[2] / ".env"
28
+ if env_path.exists():
29
+ load_dotenv(dotenv_path=env_path)
30
+ logging.getLogger(__name__).info(f"Loaded .env from {env_path}; HF_TOKEN present? {'yes' if os.getenv('HF_TOKEN') else 'no'}")
31
+ else:
32
+ # fallback to default load (cwd)
33
+ load_dotenv()
34
+ logging.getLogger(__name__).info(f"Loaded .env from cwd; HF_TOKEN present? {'yes' if os.getenv('HF_TOKEN') else 'no'}")
35
+
36
+ logger = logging.getLogger(__name__)
37
+ logger.setLevel(logging.INFO)
38
+ logger.info(f"ENV HF_TOKEN present? {'yes' if os.getenv('HF_TOKEN') else 'no'}")
39
+
40
+
41
  from .schemas import *
42
  from .storage import (
43
  create_new_session, get_session_path, save_session_config,
app/backend/storage.py CHANGED
@@ -79,75 +79,6 @@ def extract_zip(zip_file_path: Path, extract_to: Path) -> List[str]:
79
  image_filenames.append(sanitized_id)
80
  return image_filenames
81
 
82
- # def move_labeled_file(session_id: str, image_id: str, variety: str, disease: str) -> Tuple[Path, Path]:
83
- # """Moves a labeled image to its final structured directory and returns the new paths.
84
- #
85
- # NOTE: this function updates metadata.processed_path to a RELATIVE path inside the final dataset
86
- # (images/<variety>/<disease>/<image_id>) and writes a per-image annotation JSON that excludes
87
- # original_path, mask_path and status (as requested).
88
- # """
89
- # config = load_session_config(session_id)
90
- # if image_id not in config.image_metadata:
91
- # raise FileNotFoundError(f"Image id {image_id} not found in session config")
92
- # metadata = config.image_metadata[image_id]
93
- #
94
- # final_dataset_slug = "tulasi-curated-dataset"
95
- # final_base_path = OUTPUT_DIR / final_dataset_slug
96
- # final_image_dir = final_base_path / "images" / variety / disease
97
- # final_mask_dir = final_base_path / "masks"
98
- # final_annotations_dir = final_base_path / "annotations"
99
- #
100
- # final_image_dir.mkdir(parents=True, exist_ok=True)
101
- # final_annotations_dir.mkdir(parents=True, exist_ok=True)
102
- #
103
- # Prefer processed image if present, otherwise fall back to original
104
- # source_path_str = metadata.processed_path or metadata.original_path
105
- # source_path = Path(source_path_str)
106
- #
107
- # Resolve relative paths (relative to session)
108
- # if not source_path.is_absolute():
109
- # source_path = get_session_path(session_id) / source_path
110
- #
111
- # if not source_path.exists():
112
- # raise FileNotFoundError(f"Source image file not found: {source_path}")
113
- #
114
- # final_image_path = final_image_dir / image_id
115
- # use copy2 to preserve timestamps/metadata
116
- # shutil.copy2(source_path, final_image_path)
117
- #
118
- # Update metadata to point to relative paths inside final dataset
119
- # metadata.processed_path = str(final_image_path)
120
- # Update metadata to point to relative path inside final dataset (so HF can resolve it)
121
- # metadata.processed_path = str(Path("images") / variety / disease / image_id)
122
- #
123
- # save_session_config(session_id, config)
124
- #
125
- # Write a per-image annotation sidecar in final dataset WITHOUT original_path/mask_path/status
126
- # try:
127
- # produce a plain dict (compat for pydantic v2 / v1)
128
- # if hasattr(metadata, "model_dump"):
129
- # meta_dict = metadata.model_dump()
130
- # elif hasattr(metadata, "dict"):
131
- # meta_dict = metadata.dict()
132
- # else:
133
- # meta_dict = dict(metadata)
134
- #
135
- # remove unwanted fields
136
- # meta_dict.pop("original_path", None)
137
- # meta_dict.pop("mask_path", None)
138
- # meta_dict.pop("status", None)
139
- #
140
- # write a per-image annotation sidecar in final dataset
141
- # with open(final_annotations_dir / f"{Path(image_id).stem}.json", "w", encoding="utf-8") as f:
142
- # f.write(metadata.model_dump_json(indent=2))
143
- # json.dump(meta_dict, f, indent=2, ensure_ascii=False)
144
- # except Exception:
145
- # best-effort writing annotation — don't fail the labeling step if this fails
146
- # pass
147
- #
148
- # return final_base_path, final_image_path
149
-
150
-
151
  def move_labeled_file(session_id: str, image_id: str, variety: str, disease: str) -> Tuple[Path, Path]:
152
  """Moves a labeled image to its final structured directory and returns the new paths.
153
 
 
79
  image_filenames.append(sanitized_id)
80
  return image_filenames
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def move_labeled_file(session_id: str, image_id: str, variety: str, disease: str) -> Tuple[Path, Path]:
83
  """Moves a labeled image to its final structured directory and returns the new paths.
84