status-law-gbot / config /settings.py
Rulga's picture
Refactor settings.py: Replace Phi-2 model configuration with Neural Mistral 7B, enhancing reasoning and instruction following capabilities
e6ceacc
import os
import tempfile
import logging
from huggingface_hub import HfApi
from dotenv import load_dotenv
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Load environment variables
load_dotenv()
# Get token with fallback
HF_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
if not HF_TOKEN:
raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
# Validate token format
if not HF_TOKEN.startswith('hf_'):
raise ValueError("Invalid Hugging Face token format")
print(f"Token loaded successfully: {HF_TOKEN[:5]}...")
# API Configuration
API_CONFIG = {
"inference_endpoint": "https://api-inference.huggingface.co",
"token": HF_TOKEN,
"is_paid_tier": False, # Принудительно устанавливаем бесплатный режим
"timeout": 15,
"max_retries": 1,
"headers": {
"X-Use-Cache": "true", # Включаем кэширование для бесплатного тарифа
"Content-Type": "application/json",
"Authorization": f"Bearer {HF_TOKEN}"
}
}
def check_account_type():
"""
Simplified account check for free tier
Returns:
tuple: (is_pro: bool, account_type: str)
"""
return False, "free"
# Устанавливаем базовые настройки для free tier
IS_PRO_ACCOUNT, ACCOUNT_TYPE = False, "free"
DEFAULT_MODEL = "zephyr-7b" # Устанавливаем дефолтную бесплатную модель
# Dataset configuration
DATASET_ID = "Rulga/status-law-knowledge-base"
# Dataset paths
DATASET_CHAT_HISTORY_PATH = "chat_history"
DATASET_VECTOR_STORE_PATH = "vector_store"
DATASET_FINE_TUNED_PATH = "fine_tuned_models"
DATASET_ANNOTATIONS_PATH = "annotations"
DATASET_ERROR_LOGS_PATH = "error_logs"
DATASET_PREFERENCES_PATH = "preferences/user_preferences.json"
# Adding training data paths
DATASET_TRAINING_DATA_PATH = "training_data"
DATASET_TRAINING_LOGS_PATH = "training_logs"
# Temporary storage (using system temp directory)
TEMP_DIR = tempfile.gettempdir()
TEMP_ROOT = os.path.join(TEMP_DIR, "status_law_kb")
CHAT_HISTORY_PATH = os.path.join(TEMP_ROOT, "chat_history")
VECTOR_STORE_PATH = os.path.join(TEMP_ROOT, "vector_store")
FINE_TUNED_PATH = os.path.join(TEMP_ROOT, "fine_tuned_models")
MODELS_REGISTRY_PATH = os.path.join(TEMP_ROOT, "models_registry.json")
# Create temporary directories
for path in [CHAT_HISTORY_PATH, VECTOR_STORE_PATH, FINE_TUNED_PATH]:
os.makedirs(path, exist_ok=True)
# Paths configuration
MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
TRAINING_OUTPUT_DIR = os.path.join(CHAT_HISTORY_PATH, FINE_TUNED_PATH)
# Create necessary directories if they don't exist
os.makedirs(MODEL_PATH, exist_ok=True)
os.makedirs(TRAINING_OUTPUT_DIR, exist_ok=True)
# Models configuration with detailed information
MODELS = {
"zephyr-7b": {
"id": "HuggingFaceH4/zephyr-7b-beta",
"name": "Zephyr 7B",
"description": "A state-of-the-art 7B parameter language model",
"type": "base",
"parameters": {
"max_length": 2048,
"temperature": 0.7,
"top_p": 0.9,
"repetition_penalty": 1.1,
},
"training": {
"base_model_path": "HuggingFaceH4/zephyr-7b-beta",
"fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "zephyr-7b-beta-tuned"),
"lora_config": {
"r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
}
},
"details": {
"full_name": "HuggingFaceH4 Zephyr 7B Beta",
"capabilities": [
"High performance on instruction-following tasks",
"Good response accuracy",
"Advanced reasoning capabilities",
"Excellent text generation quality"
],
"limitations": [
"May require paid API for usage",
"Limited support for languages other than English",
"Less optimization for legal topics compared to specialized models"
],
"use_cases": [
"Complex legal reasoning",
"Case analysis",
"Legal research",
"Structured legal text generation"
],
"documentation": "https://huggingface.co/HuggingFaceH4/zephyr-7b-beta"
}
},
"llama-7b": {
"id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"name": "TinyLlama 1.1B Chat",
"description": "Lightweight chat model with excellent performance for its size",
"type": "base",
"parameters": {
"max_length": 2048,
"temperature": 0.7,
"top_p": 0.9,
"repetition_penalty": 1.1,
},
"training": {
"base_model_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "tinyllama-1.1b-chat-tuned"),
"lora_config": {
"r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
}
},
"details": {
"full_name": "TinyLlama 1.1B Chat v1.0",
"capabilities": [
"Efficient resource usage",
"Fast inference speed",
"Good for basic chat interactions",
"Can run on CPU",
"Works on devices with limited RAM",
"Open source and free to use"
],
"limitations": [
"Lower capacity compared to larger models",
"May struggle with complex reasoning",
"Limited context window",
"Less specialized knowledge"
],
"use_cases": [
"Quick legal queries",
"Basic document analysis",
"Resource-constrained environments",
"Mobile applications",
"Edge devices"
],
"documentation": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0"
}
},
"mixtral-8x7b": {
"id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"name": "Mixtral 8x7B Instruct",
"description": "Powerful mixture-of-experts model with strong multilingual capabilities",
"type": "base",
"parameters": {
"max_length": 2048,
"temperature": 0.7,
"top_p": 0.9,
"repetition_penalty": 1.1,
},
"training": {
"base_model_path": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "mixtral-8x7b-instruct-tuned"),
"lora_config": {
"r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
}
},
"details": {
"full_name": "Mixtral 8x7B Instruct v0.1",
"capabilities": [
"Excellent multilingual support",
"Strong reasoning capabilities",
"Superior instruction following",
"High-quality text generation"
],
"limitations": [
"Larger model size",
"May need domain-specific prompting",
"Higher resource requirements"
],
"use_cases": [
"Complex legal analysis",
"Multilingual consultation",
"Advanced document processing",
"Cross-lingual communication"
],
"documentation": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1"
}
},
"neural-mistral": { # заменяем phi-2
"id": "teknium/Neural-Mistral-7B-v0.1",
"name": "Neural Mistral 7B",
"description": "Enhanced version of Mistral with improved reasoning and instruction following",
"type": "base",
"parameters": {
"max_length": 2048,
"temperature": 0.7,
"top_p": 0.9,
"repetition_penalty": 1.1,
},
"training": {
"base_model_path": "teknium/Neural-Mistral-7B-v0.1",
"fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "neural-mistral-7b-tuned"),
"lora_config": {
"r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
}
},
"details": {
"full_name": "Neural Mistral 7B v0.1",
"capabilities": [
"Enhanced reasoning capabilities",
"Improved instruction following",
"Strong multilingual support",
"Better context understanding",
"Advanced problem-solving abilities",
"Consistent output quality"
],
"limitations": [
"Requires more GPU memory",
"May be slower than smaller models",
"Resource intensive for fine-tuning"
],
"use_cases": [
"Complex legal analysis",
"Advanced reasoning tasks",
"Detailed document processing",
"Professional consultation",
"Research assistance"
],
"documentation": "https://huggingface.co/teknium/Neural-Mistral-7B-v0.1"
}
}
}
# Update MODELS configuration
for model in MODELS.values():
model["endpoint"] = API_CONFIG["inference_endpoint"]
# Default model
DEFAULT_MODEL = "zephyr-7b" # Changed from "llama-7b" to "zephyr-7b"
ACTIVE_MODEL = MODELS[DEFAULT_MODEL]
# Embedding model for vector store
EMBEDDING_MODEL = "intfloat/multilingual-e5-large"
# Request settings
USER_AGENT = "Status-Law-Assistant/1.0"
# Add these constants to settings.py
RATING_FIELDS = {
"accuracy": "Точность ответа",
"completeness": "Полнота информации",
"relevance": "Релевантность ответу",
"clarity": "Ясность изложения",
"legal_correctness": "Юридическая корректность"
}
CHAT_HISTORY_SCHEMA = {
"conversation_id": str,
"timestamp": str, # ISO format
"history": [
{
"role": str, # "user" or "assistant"
"content": str,
"timestamp": str # ISO format
}
]
}
ANNOTATION_SCHEMA = {
"conversation_id": str,
"timestamp": str,
"question": str,
"original_answer": str,
"improved_answer": str,
"ratings": {field: int for field in RATING_FIELDS}, # все оценки 1-5
"notes": str
}