Spaces:

Rulga
/

status-law-gbot

Running

Refactor settings.py: Replace Phi-2 model configuration with Neural Mistral 7B, enhancing reasoning and instruction following capabilities

e6ceacc 8 months ago

raw

history blame contribute delete

11 kB

	import os
	import tempfile
	import logging
	from huggingface_hub import HfApi
	from dotenv import load_dotenv

	# Setup logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Load environment variables
	load_dotenv()

	# Get token with fallback
	HF_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
	if not HF_TOKEN:
	raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")

	# Validate token format
	if not HF_TOKEN.startswith('hf_'):
	raise ValueError("Invalid Hugging Face token format")

	print(f"Token loaded successfully: {HF_TOKEN[:5]}...")

	# API Configuration
	API_CONFIG = {
	"inference_endpoint": "https://api-inference.huggingface.co",
	"token": HF_TOKEN,
	"is_paid_tier": False, # Принудительно устанавливаем бесплатный режим
	"timeout": 15,
	"max_retries": 1,
	"headers": {
	"X-Use-Cache": "true", # Включаем кэширование для бесплатного тарифа
	"Content-Type": "application/json",
	"Authorization": f"Bearer {HF_TOKEN}"
	}
	}

	def check_account_type():
	"""
	Simplified account check for free tier
	Returns:
	tuple: (is_pro: bool, account_type: str)
	"""
	return False, "free"

	# Устанавливаем базовые настройки для free tier
	IS_PRO_ACCOUNT, ACCOUNT_TYPE = False, "free"
	DEFAULT_MODEL = "zephyr-7b" # Устанавливаем дефолтную бесплатную модель

	# Dataset configuration
	DATASET_ID = "Rulga/status-law-knowledge-base"

	# Dataset paths
	DATASET_CHAT_HISTORY_PATH = "chat_history"
	DATASET_VECTOR_STORE_PATH = "vector_store"
	DATASET_FINE_TUNED_PATH = "fine_tuned_models"
	DATASET_ANNOTATIONS_PATH = "annotations"
	DATASET_ERROR_LOGS_PATH = "error_logs"
	DATASET_PREFERENCES_PATH = "preferences/user_preferences.json"
	# Adding training data paths
	DATASET_TRAINING_DATA_PATH = "training_data"
	DATASET_TRAINING_LOGS_PATH = "training_logs"

	# Temporary storage (using system temp directory)
	TEMP_DIR = tempfile.gettempdir()
	TEMP_ROOT = os.path.join(TEMP_DIR, "status_law_kb")
	CHAT_HISTORY_PATH = os.path.join(TEMP_ROOT, "chat_history")
	VECTOR_STORE_PATH = os.path.join(TEMP_ROOT, "vector_store")
	FINE_TUNED_PATH = os.path.join(TEMP_ROOT, "fine_tuned_models")
	MODELS_REGISTRY_PATH = os.path.join(TEMP_ROOT, "models_registry.json")

	# Create temporary directories
	for path in [CHAT_HISTORY_PATH, VECTOR_STORE_PATH, FINE_TUNED_PATH]:
	os.makedirs(path, exist_ok=True)

	# Paths configuration
	MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
	TRAINING_OUTPUT_DIR = os.path.join(CHAT_HISTORY_PATH, FINE_TUNED_PATH)

	# Create necessary directories if they don't exist
	os.makedirs(MODEL_PATH, exist_ok=True)
	os.makedirs(TRAINING_OUTPUT_DIR, exist_ok=True)

	# Models configuration with detailed information
	MODELS = {
	"zephyr-7b": {
	"id": "HuggingFaceH4/zephyr-7b-beta",
	"name": "Zephyr 7B",
	"description": "A state-of-the-art 7B parameter language model",
	"type": "base",
	"parameters": {
	"max_length": 2048,
	"temperature": 0.7,
	"top_p": 0.9,
	"repetition_penalty": 1.1,
	},
	"training": {
	"base_model_path": "HuggingFaceH4/zephyr-7b-beta",
	"fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "zephyr-7b-beta-tuned"),
	"lora_config": {
	"r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.05,
	"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
	}
	},
	"details": {
	"full_name": "HuggingFaceH4 Zephyr 7B Beta",
	"capabilities": [
	"High performance on instruction-following tasks",
	"Good response accuracy",
	"Advanced reasoning capabilities",
	"Excellent text generation quality"
	],
	"limitations": [
	"May require paid API for usage",
	"Limited support for languages other than English",
	"Less optimization for legal topics compared to specialized models"
	],
	"use_cases": [
	"Complex legal reasoning",
	"Case analysis",
	"Legal research",
	"Structured legal text generation"
	],
	"documentation": "https://huggingface.co/HuggingFaceH4/zephyr-7b-beta"
	}
	},
	"llama-7b": {
	"id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	"name": "TinyLlama 1.1B Chat",
	"description": "Lightweight chat model with excellent performance for its size",
	"type": "base",
	"parameters": {
	"max_length": 2048,
	"temperature": 0.7,
	"top_p": 0.9,
	"repetition_penalty": 1.1,
	},
	"training": {
	"base_model_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	"fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "tinyllama-1.1b-chat-tuned"),
	"lora_config": {
	"r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.05,
	"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
	}
	},
	"details": {
	"full_name": "TinyLlama 1.1B Chat v1.0",
	"capabilities": [
	"Efficient resource usage",
	"Fast inference speed",
	"Good for basic chat interactions",
	"Can run on CPU",
	"Works on devices with limited RAM",
	"Open source and free to use"
	],
	"limitations": [
	"Lower capacity compared to larger models",
	"May struggle with complex reasoning",
	"Limited context window",
	"Less specialized knowledge"
	],
	"use_cases": [
	"Quick legal queries",
	"Basic document analysis",
	"Resource-constrained environments",
	"Mobile applications",
	"Edge devices"
	],
	"documentation": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0"
	}
	},
	"mixtral-8x7b": {
	"id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
	"name": "Mixtral 8x7B Instruct",
	"description": "Powerful mixture-of-experts model with strong multilingual capabilities",
	"type": "base",
	"parameters": {
	"max_length": 2048,
	"temperature": 0.7,
	"top_p": 0.9,
	"repetition_penalty": 1.1,
	},
	"training": {
	"base_model_path": "mistralai/Mixtral-8x7B-Instruct-v0.1",
	"fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "mixtral-8x7b-instruct-tuned"),
	"lora_config": {
	"r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.05,
	"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
	}
	},
	"details": {
	"full_name": "Mixtral 8x7B Instruct v0.1",
	"capabilities": [
	"Excellent multilingual support",
	"Strong reasoning capabilities",
	"Superior instruction following",
	"High-quality text generation"
	],
	"limitations": [
	"Larger model size",
	"May need domain-specific prompting",
	"Higher resource requirements"
	],
	"use_cases": [
	"Complex legal analysis",
	"Multilingual consultation",
	"Advanced document processing",
	"Cross-lingual communication"
	],
	"documentation": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1"
	}
	},
	"neural-mistral": { # заменяем phi-2
	"id": "teknium/Neural-Mistral-7B-v0.1",
	"name": "Neural Mistral 7B",
	"description": "Enhanced version of Mistral with improved reasoning and instruction following",
	"type": "base",
	"parameters": {
	"max_length": 2048,
	"temperature": 0.7,
	"top_p": 0.9,
	"repetition_penalty": 1.1,
	},
	"training": {
	"base_model_path": "teknium/Neural-Mistral-7B-v0.1",
	"fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "neural-mistral-7b-tuned"),
	"lora_config": {
	"r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.05,
	"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
	}
	},
	"details": {
	"full_name": "Neural Mistral 7B v0.1",
	"capabilities": [
	"Enhanced reasoning capabilities",
	"Improved instruction following",
	"Strong multilingual support",
	"Better context understanding",
	"Advanced problem-solving abilities",
	"Consistent output quality"
	],
	"limitations": [
	"Requires more GPU memory",
	"May be slower than smaller models",
	"Resource intensive for fine-tuning"
	],
	"use_cases": [
	"Complex legal analysis",
	"Advanced reasoning tasks",
	"Detailed document processing",
	"Professional consultation",
	"Research assistance"
	],
	"documentation": "https://huggingface.co/teknium/Neural-Mistral-7B-v0.1"
	}
	}
	}

	# Update MODELS configuration
	for model in MODELS.values():
	model["endpoint"] = API_CONFIG["inference_endpoint"]

	# Default model
	DEFAULT_MODEL = "zephyr-7b" # Changed from "llama-7b" to "zephyr-7b"
	ACTIVE_MODEL = MODELS[DEFAULT_MODEL]

	# Embedding model for vector store
	EMBEDDING_MODEL = "intfloat/multilingual-e5-large"

	# Request settings
	USER_AGENT = "Status-Law-Assistant/1.0"

	# Add these constants to settings.py
	RATING_FIELDS = {
	"accuracy": "Точность ответа",
	"completeness": "Полнота информации",
	"relevance": "Релевантность ответу",
	"clarity": "Ясность изложения",
	"legal_correctness": "Юридическая корректность"
	}

	CHAT_HISTORY_SCHEMA = {
	"conversation_id": str,
	"timestamp": str, # ISO format
	"history": [
	{
	"role": str, # "user" or "assistant"
	"content": str,
	"timestamp": str # ISO format
	}
	]
	}

	ANNOTATION_SCHEMA = {
	"conversation_id": str,
	"timestamp": str,
	"question": str,
	"original_answer": str,
	"improved_answer": str,
	"ratings": {field: int for field in RATING_FIELDS}, # все оценки 1-5
	"notes": str
	}