Agent-Pruning-Lab / components /provence_wrapper.py
Danielfonseca1212's picture
Create components/provence_wrapper.py
80e393a verified
"""
Wrapper para Provence/OpenProvence - Pruning de Contexto
"""
import re
import numpy as np
class ProvencePruner:
"""
Wrapper que simula o comportamento do Provence para demonstração.
Em produção, carregaria o modelo real do Hugging Face.
"""
def __init__(self, model_name=None):
self.model_name = model_name or "demo"
def _split_sentences(self, text):
"""Divide texto em sentenças"""
sentences = re.split(r'(?<=[.!?])\s+', text.strip())
return [s.strip() for s in sentences if s.strip()]
def _calculate_relevance(self, query, sentence):
"""
Simula scoring de relevância (em produção, usaria o modelo Provence real)
"""
query_words = set(query.lower().split())
sent_words = set(sentence.lower().split())
# Overlap básico
overlap = len(query_words & sent_words)
# Keywords técnicas que aumentam relevância
technical_terms = {
'pruning', 'context', 'rag', 'retrieval', 'agent',
'token', 'llm', 'relevância', 'compressão', 'provence',
'semantic', 'highlighting', 'documento', 'query'
}
term_bonus = sum(1 for term in technical_terms if term in sentence.lower())
# Score combinado
score = min(1.0, (overlap * 0.15) + (term_bonus * 0.12) + 0.1)
# Adiciona variação realista
score += np.random.normal(0, 0.05)
score = np.clip(score, 0.0, 1.0)
return round(score, 3)
def prune(self, query, document, threshold=0.5):
"""
Executa pruning de contexto
Returns:
dict com sentenças, scores e máscara de keep/remove
"""
sentences = self._split_sentences(document)
scores = [self._calculate_relevance(query, sent) for sent in sentences]
kept = [score >= threshold for score in scores]
return {
'sentences': sentences,
'scores': scores,
'kept': kept,
'pruned_sentences': [s for s, k in zip(sentences, kept) if k],
'compression_rate': round((1 - sum(kept) / len(sentences)) * 100, 1)
}