xddd-processed / config.json
jnjj's picture
Upload model after dynamic quantization and bias removal with conceptual features
3a8f206 verified
{
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"auto_map": {
"AutoModelForCausalLM": "modeling_custom.CustomLlamaForCausalLM"
},
"bias_removal": true,
"bos_token_id": 128000,
"censorship": false,
"conceptual_features": {
"abstraction_level_control": "conceptual_parameter",
"adaptive_memory_capacity_hint": true,
"bias_mitigation_strategies": [
"conceptual_filters",
"fairness_metrics_hint",
"data_augmentation_hint"
],
"calibration_score": "conceptual_score_needed",
"causal_inference_boost": true,
"context_compression_ratio": "conceptual_analysis_needed_placeholder",
"creativity_index": 0.98,
"differential_privacy_features_hint": false,
"embodied_simulation_hint": false,
"emotional_intelligence_proxy": 0.85,
"ethical_alignment_score": 0.998,
"ethical_reasoning_principles": [
"harm_reduction",
"fairness",
"accountability_hint"
],
"explainability_levels": [
"basic",
"detailed_hint"
],
"explainability_mechanisms": [
"conceptual_path_tracing",
"feature_attribution_hint"
],
"federated_learning_compatibility_hint": false,
"goal_driven_behavior_hint": true,
"grouping_logic": true,
"hierarchical_reasoning_layers_hint": true,
"interpretability_enhancements": [
"conceptual_hooks",
"attention_visualization_hint",
"neuron_activation_tracking_hint"
],
"knowledge_graph_integration_hint": true,
"learning_rate_adaptivity": "conceptual_mechanism",
"long_context_optimization": true,
"memory_mechanisms": [
"episodic",
"semantic",
"working_memory",
"associative_memory",
"procedural_memory",
"declarative_memory"
],
"multi_modal_hint": false,
"novelty_detection_hint": true,
"out_of_distribution_detection_hint": true,
"planning_horizon": 20,
"proactive_behavior_hint": true,
"reasoning_tuned": true,
"reinforcement_learning_integration_hint": true,
"reward_alignment": true,
"robustness_metrics": {
"adversarial_robustness": "conceptual_evaluation_needed"
},
"self_correction_ability": true,
"situational_awareness_score": 0.95,
"sparse_attention_pattern": false,
"symbolic_representation_hint": true,
"theory_of_mind_proxy": 0.9,
"tool_use_capability": true,
"uncertainty_quantification_hint": true
},
"custom_notes": "This model has undergone dynamic 8-bit quantization and bias zeroing. Conceptual features are documented.",
"decode_functions": [
"decode_tokens",
"decode_parameters",
"decode_responses",
"decode_layers",
"decode_neurons",
"decode_tensors",
"decode_architecture",
"decode_fused_tensor_func",
"decode_fused_layers_to_single_tensor_conceptual",
"decode_attention_patterns",
"decode_memory_state",
"decode_conceptual_graph",
"decode_causal_inference_info",
"decode_planning_details",
"decode_awareness_report",
"decode_creativity_metrics",
"decode_interpretability_hooks",
"decode_bias_mitigation",
"decode_learning_adaptivity",
"decode_knowledge_graph_hint",
"decode_theory_of_mind_proxy",
"decode_self_correction_status",
"decode_uncertainty_quantification",
"decode_context_compression",
"decode_abstraction_control",
"decode_novelty_detection",
"decode_explainability_mechanisms",
"decode_adaptive_memory_capacity",
"decode_goal_driven_behavior",
"decode_hierarchical_reasoning",
"decode_symbolic_representation",
"decode_embodied_simulation",
"decode_ethical_reasoning",
"decode_proactive_behavior",
"decode_explainability_levels",
"decode_rl_integration",
"decode_fl_compatibility",
"decode_dp_features",
"decode_robustness_metrics",
"decode_calibration_score",
"decode_ood_detection"
],
"eos_token_id": [
128001,
128008,
128009
],
"fusion": {
"details": "structural_fusion_not_applied_by_script",
"layers_original": 28
},
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 3072,
"initializer_range": 0.02,
"instruction_tuning_status": "Conceptual - Designed/Processed for instruction following. Actual fine-tuning may be required depending on base model.",
"intermediate_size": 8192,
"is_instruct_model": true,
"is_trained": true,
"max_position_embeddings": 8000,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 24,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 128004,
"pretraining_tp": 1,
"processing_pipeline": [
"load_float32",
"add_special_tokens",
"resize_embeddings",
"dynamic_quantize_8bit",
"zero_biases",
"set_eval_mode",
"set_instruct_flag",
"update_config",
"add_custom_files",
"push_to_hub"
],
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 32.0,
"high_freq_factor": 4.0,
"low_freq_factor": 1.0,
"original_max_position_embeddings": 8192,
"rope_type": "llama3"
},
"rope_theta": 500000.0,
"safetensors": true,
"tensor_fusion": true,
"tensor_fusion_size": 3606776832,
"tie_word_embeddings": true,
"torch_dtype": "float32",
"training_notes": "Model has been processed from a pre-trained version. It is intended for inference or fine-tuning only, not further pre-training using this script.",
"transformers_version": "4.51.3",
"unsloth_version": "2025.2.15",
"use_cache": true,
"vocab_size": 128260
}