|
|
import re |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from sympy import sympify, Eq |
|
|
from sympy.parsing.sympy_parser import parse_expr |
|
|
from sympy.core.sympify import SympifyError |
|
|
from concurrent.futures import ProcessPoolExecutor |
|
|
import multiprocessing as mp |
|
|
from sympy import simplify, sympify |
|
|
from sympy.core.sympify import SympifyError |
|
|
import swifter |
|
|
import random |
|
|
|
|
|
from joblib import Parallel, delayed |
|
|
|
|
|
|
|
|
from tqdm.auto import tqdm |
|
|
|
|
|
def apply_chunk(chunk, func): |
|
|
"""Helper function to apply a function to a chunk of data.""" |
|
|
return chunk.apply(func) |
|
|
|
|
|
def parallel_apply(series, func, n_jobs=None): |
|
|
n_jobs = mp.cpu_count() if n_jobs is None else n_jobs |
|
|
|
|
|
chunks = np.array_split(series, n_jobs) |
|
|
with mp.Pool(n_jobs) as pool: |
|
|
|
|
|
results = pool.starmap(apply_chunk, [(chunk, func) for chunk in chunks]) |
|
|
|
|
|
return pd.concat(results) |
|
|
|
|
|
def canonicalize_expr(expr, canonicalizer=simplify): |
|
|
canon = canonicalizer(expr) |
|
|
return (hash(canon), canon, expr) |
|
|
|
|
|
def replace_constants(equation): |
|
|
|
|
|
pattern = r'(?<![\w.])(?:[-+]?\d*\.\d+|\d+)(?![\w.])' |
|
|
return re.sub(pattern, 'C', equation) |
|
|
|
|
|
|
|
|
def augment_expression(equation, var_prefix='x', max_index=10, p=0.5): |
|
|
""" |
|
|
1. Replace all standalone numeric constants (including scientific notation) with 'C'. |
|
|
2. For each occurrence of a variable (e.g., x_1), with probability p replace it |
|
|
by a randomly chosen new variable x_1…x_max_index; otherwise leave as is. |
|
|
""" |
|
|
|
|
|
const_pattern = r'(?<![\w.])(?:[-+]?\d*\.\d+(?:[eE][-+]?\d+)?|\d+(?:[eE][-+]?\d+)?)(?![\w.])' |
|
|
equation = re.sub(const_pattern, 'C', equation) |
|
|
|
|
|
|
|
|
var_pattern = rf'\b{var_prefix}_\d+\b' |
|
|
def repl(match): |
|
|
if random.random() < p: |
|
|
new_idx = random.randint(1, max_index) |
|
|
return f"{var_prefix}_{new_idx}" |
|
|
return match.group(0) |
|
|
|
|
|
return re.sub(var_pattern, repl, equation) |
|
|
|
|
|
|
|
|
|
|
|
def is_valid_equation(equation_str): |
|
|
"""Verifica se uma string representa uma expressão matemática válida para o SymPy.""" |
|
|
if not isinstance(equation_str, str): |
|
|
return False |
|
|
if pd.isna(equation_str) or equation_str.strip() == '': |
|
|
return False |
|
|
|
|
|
try: |
|
|
|
|
|
expr = parse_expr(equation_str.strip()) |
|
|
return True |
|
|
except (SympifyError, SyntaxError, ValueError, TypeError, AttributeError): |
|
|
print(f"Erro ao analisar a equação: {equation_str}") |
|
|
|
|
|
return False |
|
|
|
|
|
def canonical_form(expr_str): |
|
|
""" |
|
|
Recebe uma expressão como string e retorna sua forma canônica (simplificada). |
|
|
""" |
|
|
try: |
|
|
|
|
|
canonica = simplify(expr_str).expand() |
|
|
return str(canonica) |
|
|
except SympifyError as e: |
|
|
return f"Erro ao interpretar a expressão: {expr_str}" |