| import os |
| import json |
| import logging |
| from pathlib import Path |
| from huggingface_hub import HfApi |
| from dotenv import load_dotenv |
| from app.config.hf_config import HF_ORGANIZATION |
|
|
| |
| BACKEND_DIR = Path(__file__).parent.parent |
| ROOT_DIR = BACKEND_DIR.parent |
|
|
| |
| load_dotenv(ROOT_DIR / ".env") |
|
|
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format='%(message)s' |
| ) |
| logger = logging.getLogger(__name__) |
|
|
| |
| HF_TOKEN = os.getenv("HF_TOKEN") |
| if not HF_TOKEN: |
| raise ValueError("HF_TOKEN not found in environment variables") |
| api = HfApi(token=HF_TOKEN) |
|
|
| def count_evaluated_models(): |
| """Count the number of evaluated models""" |
| try: |
| |
| dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/contents", repo_type="dataset") |
| |
| |
| files = api.list_repo_files(f"{HF_ORGANIZATION}/contents", repo_type="dataset") |
| |
| |
| commits = api.list_repo_commits(f"{HF_ORGANIZATION}/contents", repo_type="dataset") |
| last_commit = next(commits, None) |
| |
| |
| total_entries = 0 |
| for file in files: |
| if file.endswith('.jsonl'): |
| try: |
| |
| content = api.hf_hub_download( |
| repo_id=f"{HF_ORGANIZATION}/contents", |
| filename=file, |
| repo_type="dataset" |
| ) |
| |
| |
| with open(content, 'r') as f: |
| for _ in f: |
| total_entries += 1 |
| |
| except Exception as e: |
| logger.error(f"Error processing file {file}: {str(e)}") |
| continue |
| |
| |
| response = { |
| "total_models": total_entries, |
| "last_modified": last_commit.created_at if last_commit else None, |
| "file_count": len(files), |
| "size_bytes": dataset_info.size_in_bytes, |
| "downloads": dataset_info.downloads |
| } |
| |
| return response |
| |
| except Exception as e: |
| logger.error(f"Error counting evaluated models: {str(e)}") |
| return { |
| "error": str(e) |
| } |
|
|
| def main(): |
| """Main function to count evaluated models""" |
| try: |
| logger.info("\nAnalyzing evaluated models...") |
| result = count_evaluated_models() |
| |
| if 'error' in result: |
| logger.error(f"β Error: {result['error']}") |
| else: |
| logger.info(f"β {result['total_models']} models evaluated") |
| logger.info(f"β {result['file_count']} files") |
| logger.info(f"β {result['size_bytes'] / 1024:.1f} KB") |
| logger.info(f"β {result['downloads']} downloads") |
| |
| if result['last_modified']: |
| last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00')) |
| logger.info(f"β Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}") |
| |
| return result |
| |
| except Exception as e: |
| logger.error(f"Global error: {str(e)}") |
| return {"error": str(e)} |
|
|
| if __name__ == "__main__": |
| main() |