Spaces:
Build error
Build error
| # from dotenv import load_dotenv | |
| # load_dotenv('/home/hlymly/Documents/rse/group1/EcoMindAI/.env') # Load .env before importing config.py | |
| from huggingface_hub import login | |
| from datasets import load_dataset, Dataset, concatenate_datasets | |
| import json | |
| from config import HF_TOKEN, DATASET_NAME | |
| def init_huggingface(): | |
| """Initialize Hugging Face authentication.""" | |
| if HF_TOKEN is None: | |
| raise ValueError("Hugging Face token not found in environment variables.") | |
| login(token=HF_TOKEN) | |
| def update_dataset(json_data): | |
| """Update the Hugging Face dataset with new data.""" | |
| if json_data is None or json_data.startswith("The following fields are required"): | |
| return json_data or "No data to submit. Please fill in all required fields." | |
| try: | |
| data = json.loads(json_data) | |
| except json.JSONDecodeError: | |
| return "Invalid JSON data. Please ensure all required fields are filled correctly." | |
| try: | |
| dataset = load_dataset(DATASET_NAME, split="train") | |
| except: | |
| dataset = Dataset.from_dict({}) | |
| new_data = create_flattened_data(data) | |
| new_dataset = Dataset.from_dict(new_data) | |
| if len(dataset) > 0: | |
| updated_dataset = concatenate_datasets([dataset, new_dataset]) | |
| else: | |
| updated_dataset = new_dataset | |
| updated_dataset.push_to_hub(DATASET_NAME) | |
| return "Data submitted successfully and dataset updated!" | |
| def create_flattened_data(data): | |
| """Create a flattened data structure for the dataset.""" | |
| # Handle hyperparameters | |
| hyperparameters = data.get("task", {}).get("algorithms", [{}])[0].get("hyperparameters", {}).get("values", []) | |
| # Process hyperparameters | |
| hyperparameter_names = [] | |
| hyperparameter_values = [] | |
| for hp in hyperparameters: | |
| if "name" in hp and "value" in hp: # Match the keys used in JSON | |
| hyperparameter_names.append(hp["name"]) | |
| hyperparameter_values.append(str(hp["value"])) | |
| hyperparameter_name_str = ", ".join(hyperparameter_names) if hyperparameter_names else None | |
| hyperparameter_value_str = ", ".join(hyperparameter_values) if hyperparameter_values else None | |
| # Handle inference properties | |
| inference_props = data.get("task", {}).get("dataset", [{}])[0].get("inferenceProperties", []) | |
| # Process inference properties | |
| inference_data = [] | |
| for props in inference_props: | |
| if props: | |
| inference_data.append({ | |
| "nbRequest": props.get("nbRequest"), | |
| "nbTokensInput": props.get("nbTokensInput"), | |
| "nbWordsInput": props.get("nbWordsInput"), | |
| "nbTokensOutput": props.get("nbTokensOutput"), | |
| "nbWordsOutput": props.get("nbWordsOutput"), | |
| "contextWindowSize": props.get("contextWindowSize"), | |
| "cache": props.get("cache") | |
| }) | |
| nbRequest_str = ", ".join([str(p["nbRequest"]) for p in inference_data if p.get("nbRequest")]) if inference_data else None | |
| nbTokensInput_str = ", ".join([str(p["nbTokensInput"]) for p in inference_data if p.get("nbTokensInput")]) if inference_data else None | |
| nbWordsInput_str = ", ".join([str(p["nbWordsInput"]) for p in inference_data if p.get("nbWordsInput")]) if inference_data else None | |
| nbTokensOutput_str = ", ".join([str(p["nbTokensOutput"]) for p in inference_data if p.get("nbTokensOutput")]) if inference_data else None | |
| nbWordsOutput_str = ", ".join([str(p["nbWordsOutput"]) for p in inference_data if p.get("nbWordsOutput")]) if inference_data else None | |
| contextWindowSize_str = ", ".join([str(p["contextWindowSize"]) for p in inference_data if p.get("contextWindowSize")]) if inference_data else None | |
| cache_str = ", ".join([str(p["cache"]) for p in inference_data if p.get("cache")]) if inference_data else None | |
| # Handle components | |
| components = data.get("infrastructure", {}).get("components", []) | |
| component_data = [] | |
| for comp in components: | |
| if comp: | |
| component_data.append({ | |
| "componentName": comp.get("componentName"), | |
| "nbComponent": comp.get("nbComponent"), | |
| "memorySize": comp.get("memorySize"), | |
| "manufacturer": comp.get("manufacturer"), | |
| "family": comp.get("family"), | |
| "series": comp.get("series"), | |
| "share": comp.get("share") | |
| }) | |
| componentName_str = ", ".join([str(p["componentName"]) for p in component_data if p.get("componentName")]) if component_data else None | |
| nbComponent_str = ", ".join([str(p["nbComponent"]) for p in component_data if p.get("nbComponent")]) if component_data else None | |
| memorySize_str = ", ".join([str(p["memorySize"]) for p in component_data if p.get("memorySize")]) if component_data else None | |
| manufacturer_infra_str = ", ".join([str(p["manufacturer"]) for p in component_data if p.get("manufacturer")]) if component_data else None | |
| family_str = ", ".join([str(p["family"]) for p in component_data if p.get("family")]) if component_data else None | |
| series_str = ", ".join([str(p["series"]) for p in component_data if p.get("series")]) if component_data else None | |
| share_str = ", ".join([str(p["share"]) for p in component_data if p.get("share")]) if component_data else None | |
| return { | |
| # Header | |
| "licensing": [data["header"]["licensing"]], | |
| "formatVersion": [data["header"]["formatVersion"]], | |
| "formatVersionSpecificationUri": [data["header"]["formatVersionSpecificationUri"]], | |
| "reportId": [data["header"]["reportId"]], | |
| "reportDatetime": [data["header"]["reportDatetime"]], | |
| "reportStatus": [data["header"]["reportStatus"]], | |
| "publisher_name": [data["header"]["publisher"]["name"]], | |
| "publisher_division": [data["header"]["publisher"]["division"]], | |
| "publisher_projectName": [data["header"]["publisher"]["projectName"]], | |
| "publisher_confidentialityLevel": [data["header"]["publisher"]["confidentialityLevel"]], | |
| "publisher_publicKey": [data["header"]["publisher"]["publicKey"]], | |
| # Task | |
| "taskType": [data["task"]["taskType"]], | |
| "taskFamily": [data["task"]["taskFamily"]], | |
| "taskStage": [data["task"]["taskStage"]], | |
| "algorithmName": [data["task"]["algorithms"][0]["algorithmName"]], | |
| "framework": [data["task"]["algorithms"][0]["framework"]], | |
| "frameworkVersion": [data["task"]["algorithms"][0]["frameworkVersion"]], | |
| "classPath": [data["task"]["algorithms"][0]["classPath"]], | |
| "tuning_method": [data["task"]["algorithms"][0]["hyperparameters"]["tuning_method"]], | |
| "hyperparameterName": [hyperparameter_name_str], | |
| "hyperparameterValue": [hyperparameter_value_str], | |
| "quantization": [data["task"]["algorithms"][0]["quantization"]], | |
| "dataType": [data["task"]["dataset"][0]["dataType"]], | |
| "fileType": [data["task"]["dataset"][0]["fileType"]], | |
| "volume": [data["task"]["dataset"][0]["volume"]], | |
| "volumeUnit": [data["task"]["dataset"][0]["volumeUnit"]], | |
| "items": [data["task"]["dataset"][0]["items"]], | |
| "shape_item": [data["task"]["dataset"][0]["shape"][0]["item"]], | |
| "nbRequest": [nbRequest_str], | |
| "nbTokensInput": [nbTokensInput_str], | |
| "nbWordsInput": [nbWordsInput_str], | |
| "nbTokensOutput": [nbTokensOutput_str], | |
| "nbWordsOutput": [nbWordsOutput_str], | |
| "contextWindowSize": [contextWindowSize_str], | |
| "cache": [cache_str], | |
| "source": [data["task"]["dataset"][0]["source"]], | |
| "sourceUri": [data["task"]["dataset"][0]["sourceUri"]], | |
| "owner": [data["task"]["dataset"][0]["owner"]], | |
| "measuredAccuracy": [data["task"]["measuredAccuracy"]], | |
| "estimatedAccuracy": [data["task"]["estimatedAccuracy"]], | |
| # Measures | |
| "measurementMethod": [data["measures"][0]["measurementMethod"]], | |
| "manufacturer": [data["measures"][0]["manufacturer"]], | |
| "version": [data["measures"][0]["version"]], | |
| "cpuTrackingMode": [data["measures"][0]["cpuTrackingMode"]], | |
| "gpuTrackingMode": [data["measures"][0]["gpuTrackingMode"]], | |
| "averageUtilizationCpu": [data["measures"][0]["averageUtilizationCpu"]], | |
| "averageUtilizationGpu": [data["measures"][0]["averageUtilizationGpu"]], | |
| "serverSideInference": [data["measures"][0]["serverSideInference"]], | |
| "unit": [data["measures"][0]["unit"]], | |
| "powerCalibrationMeasurement": [data["measures"][0]["powerCalibrationMeasurement"]], | |
| "durationCalibrationMeasurement": [data["measures"][0]["durationCalibrationMeasurement"]], | |
| "powerConsumption": [data["measures"][0]["powerConsumption"]], | |
| "measurementDuration": [data["measures"][0]["measurementDuration"]], | |
| "measurementDateTime": [data["measures"][0]["measurementDateTime"]], | |
| # System | |
| "os": [data["system"]["os"]], | |
| "distribution": [data["system"]["distribution"]], | |
| "distributionVersion": [data["system"]["distributionVersion"]], | |
| # Software | |
| "language": [data["software"]["language"]], | |
| "version_software": [data["software"]["version"]], | |
| # Infrastructure | |
| "infraType": [data["infrastructure"]["infraType"]], | |
| "cloudProvider": [data["infrastructure"]["cloudProvider"]], | |
| "cloudInstance": [data["infrastructure"]["cloudInstance"]], | |
| "componentName": [componentName_str], | |
| "nbComponent": [nbComponent_str], | |
| "memorySize": [memorySize_str], | |
| "manufacturer_infra": [manufacturer_infra_str], | |
| "family": [family_str], | |
| "series": [series_str], | |
| "share": [share_str], | |
| # Environment | |
| "country": [data["environment"]["country"]], | |
| "latitude": [data["environment"]["latitude"]], | |
| "longitude": [data["environment"]["longitude"]], | |
| "location": [data["environment"]["location"]], | |
| "powerSupplierType": [data["environment"]["powerSupplierType"]], | |
| "powerSource": [data["environment"]["powerSource"]], | |
| "powerSourceCarbonIntensity": [data["environment"]["powerSourceCarbonIntensity"]], | |
| # Quality | |
| "quality": [data["quality"]], | |
| # Hash | |
| "hashAlgorithm": [data["$hash"]["hashAlgorithm"]], | |
| "cryptographicAlgorithm": [data["$hash"]["cryptographicAlgorithm"]], | |
| "value": [data["$hash"]["ecryptedValue"]] | |
| } |