Spaces:
Sleeping
Sleeping
Comprehensive Refinement Cycle v2.6: Security, Performance, and Training Quality
Browse files- .gitignore +5 -0
- execution/fastapi_server.py +34 -8
- execution/inference_wav2vec.py +30 -14
- execution/metadata_extractor.py +33 -11
- execution/train_wav2vec.py +13 -3
.gitignore
CHANGED
|
@@ -31,3 +31,8 @@ local_finetuned_model/
|
|
| 31 |
# OS
|
| 32 |
.DS_Store
|
| 33 |
Thumbs.db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
# OS
|
| 32 |
.DS_Store
|
| 33 |
Thumbs.db
|
| 34 |
+
|
| 35 |
+
# Frontend
|
| 36 |
+
node_modules/
|
| 37 |
+
dist/
|
| 38 |
+
.next/
|
execution/fastapi_server.py
CHANGED
|
@@ -1,9 +1,12 @@
|
|
| 1 |
import os
|
| 2 |
import shutil
|
|
|
|
|
|
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
-
from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Depends, Header, status
|
| 5 |
from fastapi.staticfiles import StaticFiles
|
| 6 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 7 |
from pydantic import BaseModel
|
| 8 |
import zipfile
|
| 9 |
import rarfile
|
|
@@ -17,16 +20,39 @@ load_dotenv()
|
|
| 17 |
from execution.feature_extractor import extract_features
|
| 18 |
from execution.ensemble_manager import get_combined_verdict
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
app.add_middleware(
|
| 24 |
CORSMiddleware,
|
| 25 |
-
allow_origins=
|
|
|
|
| 26 |
allow_methods=["*"],
|
| 27 |
allow_headers=["*"],
|
| 28 |
)
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
# Estado global do treinamento (simplificado para MVP)
|
| 31 |
training_status = {
|
| 32 |
"status": "idle", # idle, processing, training, completed, failed
|
|
@@ -35,14 +61,13 @@ training_status = {
|
|
| 35 |
"error": None
|
| 36 |
}
|
| 37 |
|
| 38 |
-
# Verificador de token
|
| 39 |
def verify_admin_token(authorization: str = Header(None)):
|
| 40 |
if not authorization or not authorization.startswith("Bearer "):
|
| 41 |
raise HTTPException(status_code=401, detail="Token ausente ou inválido")
|
| 42 |
|
| 43 |
token = authorization.split(" ")[1]
|
| 44 |
-
|
| 45 |
-
if token != "confereai_admin_token_2026":
|
| 46 |
raise HTTPException(status_code=401, detail="Token inválido")
|
| 47 |
return token
|
| 48 |
|
|
@@ -118,7 +143,8 @@ class LoginRequest(BaseModel):
|
|
| 118 |
async def admin_login(req: LoginRequest):
|
| 119 |
admin_pw = os.environ.get("ADMIN_PASSWORD", "Casa102030@")
|
| 120 |
if req.password == admin_pw:
|
| 121 |
-
|
|
|
|
| 122 |
raise HTTPException(status_code=401, detail="Senha incorreta")
|
| 123 |
|
| 124 |
@app.post("/admin/upload_dataset")
|
|
|
|
| 1 |
import os
|
| 2 |
import shutil
|
| 3 |
+
import sys
|
| 4 |
+
import json
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
+
from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Depends, Header, status, Request, Query
|
| 7 |
from fastapi.staticfiles import StaticFiles
|
| 8 |
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
+
from fastapi.responses import JSONResponse
|
| 10 |
from pydantic import BaseModel
|
| 11 |
import zipfile
|
| 12 |
import rarfile
|
|
|
|
| 20 |
from execution.feature_extractor import extract_features
|
| 21 |
from execution.ensemble_manager import get_combined_verdict
|
| 22 |
|
| 23 |
+
# Configurações de Segurança e Limites
|
| 24 |
+
ADMIN_TOKEN = os.environ.get("ADMIN_TOKEN", "confereai_admin_token_2026")
|
| 25 |
+
UPLOAD_MAX_SIZE = 10 * 1024 * 1024 # 10MB para análises comuns
|
| 26 |
+
ALLOWED_ORIGINS = os.environ.get("ALLOWED_ORIGINS", "*").split(",")
|
| 27 |
|
| 28 |
+
app = FastAPI(title="ConfereAI Audio Fraud Detection API", version="2.6")
|
| 29 |
+
|
| 30 |
+
# Configuração de CORS Dinâmica
|
| 31 |
app.add_middleware(
|
| 32 |
CORSMiddleware,
|
| 33 |
+
allow_origins=ALLOWED_ORIGINS,
|
| 34 |
+
allow_credentials=True,
|
| 35 |
allow_methods=["*"],
|
| 36 |
allow_headers=["*"],
|
| 37 |
)
|
| 38 |
|
| 39 |
+
# --- MIDDLEWARE DE TAMANHO DE UPLOAD ---
|
| 40 |
+
from fastapi import Request
|
| 41 |
+
from fastapi.responses import JSONResponse
|
| 42 |
+
|
| 43 |
+
@app.middleware("http")
|
| 44 |
+
async def limit_upload_size(request: Request, call_next):
|
| 45 |
+
# O limite de 10MB não se aplica às rotas de admin (datasets são maiores)
|
| 46 |
+
if request.method == "POST" and not request.url.path.startswith("/admin"):
|
| 47 |
+
if "content-length" in request.headers:
|
| 48 |
+
if int(request.headers["content-length"]) > UPLOAD_MAX_SIZE:
|
| 49 |
+
return JSONResponse(
|
| 50 |
+
status_code=413,
|
| 51 |
+
content={"error": "Arquivo muito grande para análise. Limite de 10MB."}
|
| 52 |
+
)
|
| 53 |
+
return await call_next(request)
|
| 54 |
+
# ---------------------------------------
|
| 55 |
+
|
| 56 |
# Estado global do treinamento (simplificado para MVP)
|
| 57 |
training_status = {
|
| 58 |
"status": "idle", # idle, processing, training, completed, failed
|
|
|
|
| 61 |
"error": None
|
| 62 |
}
|
| 63 |
|
| 64 |
+
# Verificador de token usando variável de ambiente
|
| 65 |
def verify_admin_token(authorization: str = Header(None)):
|
| 66 |
if not authorization or not authorization.startswith("Bearer "):
|
| 67 |
raise HTTPException(status_code=401, detail="Token ausente ou inválido")
|
| 68 |
|
| 69 |
token = authorization.split(" ")[1]
|
| 70 |
+
if token != ADMIN_TOKEN:
|
|
|
|
| 71 |
raise HTTPException(status_code=401, detail="Token inválido")
|
| 72 |
return token
|
| 73 |
|
|
|
|
| 143 |
async def admin_login(req: LoginRequest):
|
| 144 |
admin_pw = os.environ.get("ADMIN_PASSWORD", "Casa102030@")
|
| 145 |
if req.password == admin_pw:
|
| 146 |
+
# Correção Crítica: Retornar o token real configurado e não uma string fixa
|
| 147 |
+
return {"token": ADMIN_TOKEN}
|
| 148 |
raise HTTPException(status_code=401, detail="Senha incorreta")
|
| 149 |
|
| 150 |
@app.post("/admin/upload_dataset")
|
execution/inference_wav2vec.py
CHANGED
|
@@ -12,6 +12,33 @@ LOCAL_MODEL_DIR = "./local_finetuned_model"
|
|
| 12 |
CUSTOM_MODEL_REPO = os.environ.get("CUSTOM_MODEL_REPO", "TEDDyx86/confereai-wav2vec2")
|
| 13 |
BASE_MODEL = "HyperMoon/wav2vec2-base-960h-finetuned-deepfake"
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def run_inference(audio_path, fallback_model_name=None):
|
| 16 |
"""
|
| 17 |
Realiza inferência real priorizando o modelo fine-tuned.
|
|
@@ -27,20 +54,9 @@ def run_inference(audio_path, fallback_model_name=None):
|
|
| 27 |
print(f"Rodando inferência REAL [{model_name}] em: {audio_path}", file=sys.stderr)
|
| 28 |
|
| 29 |
try:
|
| 30 |
-
# 1. Carrega extrator de características e modelo
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
model = AutoModelForAudioClassification.from_pretrained(model_path)
|
| 34 |
-
|
| 35 |
-
# --- OTIMIZAÇÃO: Quantização Dinâmica para CPU ---
|
| 36 |
-
# Reduz o tamanho do modelo e acelera inferência em CPU (Hugging Face)
|
| 37 |
-
if not torch.cuda.is_available():
|
| 38 |
-
print("Aplicando Quantização Dinâmica (CPU Optimization)...", file=sys.stderr)
|
| 39 |
-
model = torch.quantization.quantize_dynamic(
|
| 40 |
-
model, {torch.nn.Linear}, dtype=torch.qint8
|
| 41 |
-
)
|
| 42 |
-
|
| 43 |
-
model.eval()
|
| 44 |
|
| 45 |
# 2. Carrega e pré-processa o áudio
|
| 46 |
print(f"Lendo áudio: {audio_path}", file=sys.stderr)
|
|
|
|
| 12 |
CUSTOM_MODEL_REPO = os.environ.get("CUSTOM_MODEL_REPO", "TEDDyx86/confereai-wav2vec2")
|
| 13 |
BASE_MODEL = "HyperMoon/wav2vec2-base-960h-finetuned-deepfake"
|
| 14 |
|
| 15 |
+
# Singleton para carregar o modelo e processador apenas uma vez
|
| 16 |
+
_feature_extractor = None
|
| 17 |
+
_model = None
|
| 18 |
+
_last_model_path = None
|
| 19 |
+
|
| 20 |
+
def get_wav2vec_resources(model_path):
|
| 21 |
+
global _feature_extractor, _model, _last_model_path
|
| 22 |
+
|
| 23 |
+
# Invalidação de Cache: Se o path mudou, precisamos recarregar o modelo
|
| 24 |
+
if _feature_extractor is None or _model is None or _last_model_path != model_path:
|
| 25 |
+
print(f"Carregando motor Wav2Vec2 (HyperMoon): {model_path}...", file=sys.stderr)
|
| 26 |
+
_feature_extractor = AutoFeatureExtractor.from_pretrained(model_path)
|
| 27 |
+
model = AutoModelForAudioClassification.from_pretrained(model_path)
|
| 28 |
+
|
| 29 |
+
# --- OTIMIZAÇÃO: Quantização Dinâmica para CPU ---
|
| 30 |
+
if not torch.cuda.is_available():
|
| 31 |
+
print("Aplicando Quantização Dinâmica (CPU Optimization)...", file=sys.stderr)
|
| 32 |
+
model = torch.quantization.quantize_dynamic(
|
| 33 |
+
model, {torch.nn.Linear}, dtype=torch.qint8
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
_model = model
|
| 37 |
+
_model.eval()
|
| 38 |
+
_last_model_path = model_path
|
| 39 |
+
|
| 40 |
+
return _feature_extractor, _model
|
| 41 |
+
|
| 42 |
def run_inference(audio_path, fallback_model_name=None):
|
| 43 |
"""
|
| 44 |
Realiza inferência real priorizando o modelo fine-tuned.
|
|
|
|
| 54 |
print(f"Rodando inferência REAL [{model_name}] em: {audio_path}", file=sys.stderr)
|
| 55 |
|
| 56 |
try:
|
| 57 |
+
# 1. Carrega extrator de características e modelo (Singleton)
|
| 58 |
+
feature_extractor, model = get_wav2vec_resources(model_path)
|
| 59 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# 2. Carrega e pré-processa o áudio
|
| 62 |
print(f"Lendo áudio: {audio_path}", file=sys.stderr)
|
execution/metadata_extractor.py
CHANGED
|
@@ -1,20 +1,42 @@
|
|
|
|
|
|
|
|
| 1 |
import sys
|
| 2 |
import json
|
|
|
|
| 3 |
|
| 4 |
def extract_metadata(file_path):
|
| 5 |
"""
|
| 6 |
-
Extrai metadados
|
| 7 |
"""
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
if __name__ == "__main__":
|
| 20 |
if len(sys.argv) < 2:
|
|
|
|
| 1 |
+
import librosa
|
| 2 |
+
import os
|
| 3 |
import sys
|
| 4 |
import json
|
| 5 |
+
from datetime import datetime
|
| 6 |
|
| 7 |
def extract_metadata(file_path):
|
| 8 |
"""
|
| 9 |
+
Extrai metadados reais de um arquivo de áudio usando librosa.
|
| 10 |
"""
|
| 11 |
+
try:
|
| 12 |
+
# Carrega apenas os metadados (duration) sem ler todo o áudio se possível
|
| 13 |
+
# librosa.get_duration é eficiente
|
| 14 |
+
duration = librosa.get_duration(path=file_path)
|
| 15 |
+
|
| 16 |
+
# Para taxa de amostragem e canais, carregamos um pequeno trecho
|
| 17 |
+
y, sr = librosa.load(file_path, sr=None, duration=0.1)
|
| 18 |
+
channels = 1 if len(y.shape) == 1 else y.shape[0]
|
| 19 |
+
|
| 20 |
+
# Informações do arquivo
|
| 21 |
+
file_stats = os.stat(file_path)
|
| 22 |
+
creation_time = datetime.fromtimestamp(file_stats.st_ctime).strftime('%Y-%m-%d %H:%M:%S')
|
| 23 |
+
file_format = os.path.splitext(file_path)[1].replace('.', '').upper()
|
| 24 |
+
|
| 25 |
+
metadata = {
|
| 26 |
+
"format": file_format,
|
| 27 |
+
"sample_rate": sr,
|
| 28 |
+
"channels": channels,
|
| 29 |
+
"duration_seconds": round(duration, 2),
|
| 30 |
+
"encoder": "Librosa Forensic Parser",
|
| 31 |
+
"creation_time": creation_time
|
| 32 |
+
}
|
| 33 |
+
return metadata
|
| 34 |
+
except Exception as e:
|
| 35 |
+
print(f"Erro ao extrair metadados: {e}")
|
| 36 |
+
return {
|
| 37 |
+
"format": "Unknown",
|
| 38 |
+
"error": str(e)
|
| 39 |
+
}
|
| 40 |
|
| 41 |
if __name__ == "__main__":
|
| 42 |
if len(sys.argv) < 2:
|
execution/train_wav2vec.py
CHANGED
|
@@ -72,9 +72,19 @@ def start_finetuning(dataset_dir: str):
|
|
| 72 |
"""
|
| 73 |
processor = get_processor()
|
| 74 |
|
| 75 |
-
# Prepara os datasets
|
| 76 |
-
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
if len(train_dataset) == 0:
|
| 79 |
raise ValueError("Nenhum áudio encontrado no dataset.")
|
| 80 |
|
|
@@ -119,7 +129,7 @@ def start_finetuning(dataset_dir: str):
|
|
| 119 |
model=model,
|
| 120 |
args=training_args,
|
| 121 |
train_dataset=train_dataset,
|
| 122 |
-
eval_dataset=
|
| 123 |
)
|
| 124 |
|
| 125 |
trainer.train()
|
|
|
|
| 72 |
"""
|
| 73 |
processor = get_processor()
|
| 74 |
|
| 75 |
+
# Prepara os datasets com split de 80/20 para avaliação real
|
| 76 |
+
full_dataset = DeepfakeAudioDataset(dataset_dir, processor)
|
| 77 |
|
| 78 |
+
if len(full_dataset) < 10:
|
| 79 |
+
print("⚠️ Dataset muito pequeno. Usando todo o conjunto para treino e eval.")
|
| 80 |
+
train_dataset = full_dataset
|
| 81 |
+
eval_dataset = full_dataset
|
| 82 |
+
else:
|
| 83 |
+
train_size = int(0.8 * len(full_dataset))
|
| 84 |
+
eval_size = len(full_dataset) - train_size
|
| 85 |
+
train_dataset, eval_dataset = torch.utils.data.random_split(full_dataset, [train_size, eval_size])
|
| 86 |
+
print(f"📊 Dataset dividido: {train_size} para treino, {eval_size} para avaliação.")
|
| 87 |
+
|
| 88 |
if len(train_dataset) == 0:
|
| 89 |
raise ValueError("Nenhum áudio encontrado no dataset.")
|
| 90 |
|
|
|
|
| 129 |
model=model,
|
| 130 |
args=training_args,
|
| 131 |
train_dataset=train_dataset,
|
| 132 |
+
eval_dataset=eval_dataset, # Agora usando o split real de 20%
|
| 133 |
)
|
| 134 |
|
| 135 |
trainer.train()
|