from noise_sensitivity import *

n = 25; p=1/n; epsilon=0.3

X = equivalence(n, epsilon)
graph(X,"Graph of $(X, \sim)$", equivalence=True)
print(f"n = {[len(X_i) for X_i in X]} are the sizes of each equivalence class.\n")

N.B: Reflexivity i.e. self-connectedness does not reflect in this graph.

n = [8, 8, 9] are the sizes of each equivalence class.

X_ = noisy(X, p)
graph(X_, f"Noisy Equivalence Relation $(X, \sim')$ with $p={p}$", equivalence=False)

N.B: Reflexivity i.e. self-connectedness does not reflect in this graph.

S_X = score(X, equivalence=True)
matrix(S_X, title="Equivalence Relation Score Matrix", width=3)

Equivalence Relation Score Matrix
  8  0  0  0  0  0  0  0  0  0  0  0  7  0  0  7  7  7  7  0  0  7  7  0  7  
  0  7  6  0  0  0  0  0  6  6  6  6  0  6  0  0  0  0  0  0  6  0  0  0  0  
  0  6  7  0  0  0  0  0  6  6  6  6  0  6  0  0  0  0  0  0  6  0  0  0  0  
  0  0  0  7  6  6  6  6  0  0  0  0  0  0  6  0  0  0  0  6  0  0  0  6  0  
  0  0  0  6  7  6  6  6  0  0  0  0  0  0  6  0  0  0  0  6  0  0  0  6  0  
  0  0  0  6  6  7  6  6  0  0  0  0  0  0  6  0  0  0  0  6  0  0  0  6  0  
  0  0  0  6  6  6  7  6  0  0  0  0  0  0  6  0  0  0  0  6  0  0  0  6  0  
  0  0  0  6  6  6  6  7  0  0  0  0  0  0  6  0  0  0  0  6  0  0  0  6  0  
  0  6  6  0  0  0  0  0  7  6  6  6  0  6  0  0  0  0  0  0  6  0  0  0  0  
  0  6  6  0  0  0  0  0  6  7  6  6  0  6  0  0  0  0  0  0  6  0  0  0  0  
  0  6  6  0  0  0  0  0  6  6  7  6  0  6  0  0  0  0  0  0  6  0  0  0  0  
  0  6  6  0  0  0  0  0  6  6  6  7  0  6  0  0  0  0  0  0  6  0  0  0  0  
  7  0  0  0  0  0  0  0  0  0  0  0  8  0  0  7  7  7  7  0  0  7  7  0  7  
  0  6  6  0  0  0  0  0  6  6  6  6  0  7  0  0  0  0  0  0  6  0  0  0  0  
  0  0  0  6  6  6  6  6  0  0  0  0  0  0  7  0  0  0  0  6  0  0  0  6  0  
  7  0  0  0  0  0  0  0  0  0  0  0  7  0  0  8  7  7  7  0  0  7  7  0  7  
  7  0  0  0  0  0  0  0  0  0  0  0  7  0  0  7  8  7  7  0  0  7  7  0  7  
  7  0  0  0  0  0  0  0  0  0  0  0  7  0  0  7  7  8  7  0  0  7  7  0  7  
  7  0  0  0  0  0  0  0  0  0  0  0  7  0  0  7  7  7  8  0  0  7  7  0  7  
  0  0  0  6  6  6  6  6  0  0  0  0  0  0  6  0  0  0  0  7  0  0  0  6  0  
  0  6  6  0  0  0  0  0  6  6  6  6  0  6  0  0  0  0  0  0  7  0  0  0  0  
  7  0  0  0  0  0  0  0  0  0  0  0  7  0  0  7  7  7  7  0  0  8  7  0  7  
  7  0  0  0  0  0  0  0  0  0  0  0  7  0  0  7  7  7  7  0  0  7  8  0  7  
  0  0  0  6  6  6  6  6  0  0  0  0  0  0  6  0  0  0  0  6  0  0  0  7  0  
  7  0  0  0  0  0  0  0  0  0  0  0  7  0  0  7  7  7  7  0  0  7  7  0  8

S_X_ = score(X_)
matrix(S_X_, title="Noisy Score Matrix", width=3)

Noisy Score Matrix
 10  3  3  0  0  0  0  1  1  3  2  1  7  2  1  7  7  7  7  0  2  7  7  0  7  
  3  7  5  0  0  0  0  0  5  7  5  5  1  5  0  1  2  1  0  0  5  1  1  0  1  
  3  5  8  0  0  0  0  0  6  5  6  6  1  6  0  1  0  1  3  0  6  1  1  0  1  
  0  0  0  7  6  6  6  6  0  0  0  0  0  0  6  0  0  0  0  6  0  1  1  6  0  
  0  0  0  6  7  6  6  6  0  0  0  0  0  0  6  0  0  0  0  6  0  1  1  6  0  
  0  0  0  6  6  7  6  6  0  0  0  0  0  0  6  0  0  0  0  6  0  1  1  6  0  
  0  0  0  6  6  6  7  6  0  0  0  0  0  0  6  0  0  0  0  6  0  1  1  6  0  
  1  0  0  6  6  6  6  8  0  0  0  0  1  0  6  1  1  1  1  6  0  0  2  6  1  
  1  5  6  0  0  0  0  0  8  5  6  7  1  6  0  1  2  1  3  0  6  1  1  0  1  
  3  7  5  0  0  0  0  0  5  7  5  5  1  5  0  1  2  1  0  0  5  1  1  0  1  
  2  5  6  0  0  0  0  0  6  5  7  6  0  6  0  0  1  0  2  0  6  0  0  0  0  
  1  5  6  0  0  0  0  0  7  5  6  8  1  6  0  1  2  1  3  0  6  1  1  0  1  
  7  1  1  0  0  0  0  1  1  1  0  1  8  0  1  7  7  7  7  0  0  7  7  0  7  
  2  5  6  0  0  0  0  0  6  5  6  6  0  7  0  0  1  0  2  0  6  0  0  0  0  
  1  0  0  6  6  6  6  6  0  0  0  0  1  0  8  1  1  1  1  6  0  2  0  6  1  
  7  1  1  0  0  0  0  1  1  1  0  1  7  0  1  8  7  7  7  0  0  7  7  0  7  
  7  2  0  0  0  0  0  1  2  2  1  2  7  1  1  7  9  7  7  0  1  7  7  0  7  
  7  1  1  0  0  0  0  1  1  1  0  1  7  0  1  7  7  8  7  0  0  7  7  0  7  
  7  0  3  0  0  0  0  1  3  0  2  3  7  2  1  7  7  7 10  0  2  7  7  0  7  
  0  0  0  6  6  6  6  6  0  0  0  0  0  0  6  0  0  0  0  7  0  1  1  6  0  
  2  5  6  0  0  0  0  0  6  5  6  6  0  6  0  0  1  0  2  0  7  0  0  0  0  
  7  1  1  1  1  1  1  0  1  1  0  1  7  0  2  7  7  7  7  1  0  9  7  1  7  
  7  1  1  1  1  1  1  2  1  1  0  1  7  0  0  7  7  7  7  1  0  7  9  1  7  
  0  0  0  6  6  6  6  6  0  0  0  0  0  0  6  0  0  0  0  6  0  1  1  7  0  
  7  1  1  0  0  0  0  1  1  1  0  1  7  0  1  7  7  7  7  0  0  7  7  0  8

s = np.sort(S_X_, axis=1)[:, ::-1]
matrix(M1=s, title="Noisy Sorted Score Matrix\n", width=3)

Noisy Sorted Score Matrix

 10  7  7  7  7  7  7  7  7  3  3  3  2  2  2  1  1  1  1  0  0  0  0  0  0  
  7  7  5  5  5  5  5  5  3  2  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  
  8  6  6  6  6  6  5  5  3  3  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  
  7  6  6  6  6  6  6  6  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  
  7  6  6  6  6  6  6  6  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  
  7  6  6  6  6  6  6  6  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  
  7  6  6  6  6  6  6  6  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  
  8  6  6  6  6  6  6  6  2  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  
  8  7  6  6  6  6  5  5  3  2  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  
  7  7  5  5  5  5  5  5  3  2  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  
  7  6  6  6  6  6  5  5  2  2  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  
  8  7  6  6  6  6  5  5  3  2  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  
  8  7  7  7  7  7  7  7  7  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  
  7  6  6  6  6  6  5  5  2  2  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  
  8  6  6  6  6  6  6  6  2  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  
  8  7  7  7  7  7  7  7  7  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  
  9  7  7  7  7  7  7  7  7  2  2  2  2  1  1  1  1  1  0  0  0  0  0  0  0  
  8  7  7  7  7  7  7  7  7  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  
 10  7  7  7  7  7  7  7  7  3  3  3  2  2  2  1  1  0  0  0  0  0  0  0  0  
  7  6  6  6  6  6  6  6  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  
  7  6  6  6  6  6  5  5  2  2  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  
  9  7  7  7  7  7  7  7  7  2  1  1  1  1  1  1  1  1  1  1  1  0  0  0  0  
  9  7  7  7  7  7  7  7  7  2  1  1  1  1  1  1  1  1  1  1  1  0  0  0  0  
  7  6  6  6  6  6  6  6  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  
  8  7  7  7  7  7  7  7  7  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0

n_min = int(np.ceil(epsilon * n)) - 1 # minus 1 uses 0-based indexing
print(f"The minimum class size is {n_min+1}.")

The minimum class size is 8.

tau = np.zeros(n)

x = 0

print(f"Scores of {x+1} : {s[x, :]}") # 0-based indexing

Scores of 1 : [10.  7.  7.  7.  7.  7.  7.  7.  7.  3.  3.  3.  2.  2.  2.  1.  1.  1.  1.  0.  0.  0.  0.  0.  0.]

print(s[x, n_min:])

[7. 7. 3. 3. 3. 2. 2. 2. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]

k = n_min + 2; gap = s[x, k]-s[x, k+1]
print(f"A cut at k = {k+1} gives the gap {gap}")

A cut at k = 10 gives the gap 0.0

mean = xi(n, k+1, epsilon, p)  # 1-based indexing, hence k+1
t = mean - gap
print(f"Theoretical vs Observed Gap: t = {mean} - {gap} = {t}")

Theoretical vs Observed Gap: t = 6.8136 - 0.0 = 6.8136

sigma_2 = var(n, k+1, epsilon, p) # 1-based indexing, hence k+1

bound = bernstein(sigma_2, t)
print(f"nᵢ = {k+1} | P[ξ̂  deviated from ξ by {t}] ≤ {bound}")

nᵢ = 10 | P[ξ̂  deviated from ξ by 6.8136] ≤ 0.0031667272766464295

for k in range(n_min, n - 1):
    sigma_2 = var(n, k+1, epsilon, p); mean = xi(n, k+1, epsilon, p); gap = s[x, k] - s[x, k + 1]
    t = mean - gap # deviation
    if t < 0: 
        continue # skips upper deviations
    else:
        T = bernstein(sigma_2, t)
        print(f"Cut at nᵢ={k+1} | P[deviation] ≤ {T}")

Cut at nᵢ=8 | P[deviation] ≤ 0.019435595420831876
Cut at nᵢ=9 | P[deviation] ≤ 0.44418453353757326
Cut at nᵢ=10 | P[deviation] ≤ 0.0031667272766464295
Cut at nᵢ=11 | P[deviation] ≤ 0.001254667098148025
Cut at nᵢ=12 | P[deviation] ≤ 0.0016898809490244855
Cut at nᵢ=13 | P[deviation] ≤ 0.00019245421859838077
Cut at nᵢ=14 | P[deviation] ≤ 7.475235125052697e-05
Cut at nᵢ=15 | P[deviation] ≤ 0.00010320656237684635
Cut at nᵢ=16 | P[deviation] ≤ 1.114917783263806e-05
Cut at nᵢ=17 | P[deviation] ≤ 4.286896783374554e-06
Cut at nᵢ=18 | P[deviation] ≤ 1.6445639658491365e-06
Cut at nᵢ=19 | P[deviation] ≤ 2.318511263495415e-06
Cut at nᵢ=20 | P[deviation] ≤ 2.407017772362285e-07
Cut at nᵢ=21 | P[deviation] ≤ 9.188096646659168e-08
Cut at nᵢ=22 | P[deviation] ≤ 3.5030222447038494e-08
Cut at nᵢ=23 | P[deviation] ≤ 1.3341350468324918e-08
Cut at nᵢ=24 | P[deviation] ≤ 5.0763714392211695e-09

tau[x] = s[x, np.argmax(T)]
print(f"Scores of {x+1} : {s[x, :]}") # 0-based indexing
print(f"⇒ Cut at nᵢ= {np.argmax(T)} with τ(x) = {tau[x]}") # 0-based indexing

Scores of 1 : [10.  7.  7.  7.  7.  7.  7.  7.  7.  3.  3.  3.  2.  2.  2.  1.  1.  1.  1.  0.  0.  0.  0.  0.  0.]
⇒ Cut at nᵢ= 0 with τ(x) = 10.0

def recovery(A, p, epsilon, return_tau=False, draw=True):
    
    n = A.shape[0] 
    n_min = round(epsilon * n) - 1  # minimum equivalence class size, index-0 counting

    if draw:
        graph(A, title="Noisy Relation") 

    X_ = A.copy()

    # Score and sort
    S_X_ = score(X_)
    s = np.sort(S_X_, axis=1)[:, ::-1]

    # Compute tau thresholds
    tau = np.zeros(n)
    
    # for all x
    for x in range(n):
        T = np.zeros(n - 1)
        # assume nᵢ = k+1
        for k in range(n_min, n - 1): 
            # variance | lemma 3.2
            sigma_2 = var(n, k+1, epsilon, p)
            # expected gap | lemma 3.1
            mean = xi(n, k+1, epsilon, p)
            # measured gap
            gap = s[x, k] - s[x, k + 1]
            # deviation
            t = mean - gap
            
            # upper deviations are uninteresting
            if t < 0:
                continue
            else:
                # apply the sieve
                T[k] = bernstein(sigma_2, t)
        
        # tau stores the score thresholds
        tau[x] = s[x, np.argmax(T)]

    # Recover adjacency matrix
    Adjacency = (S_X_ >= tau[:, np.newaxis]).astype(int)

    if draw:
        graph(Adjacency, title="Recovery")

    return (Adjacency, tau) if return_tau else Adjacency

n=20; epsilon=0.3; p=1/n;
X = equivalence(n, epsilon)
graph(X, equivalence=True, title="Equivalence Relation")

N.B: Reflexivity i.e. self-connectedness does not reflect in this graph.

X_ = noisy(X, p); A_X_ = recovery(X_, p, epsilon)

N.B: Reflexivity i.e. self-connectedness does not reflect in this graph.

N.B: Reflexivity i.e. self-connectedness does not reflect in this graph.

import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import networkx.algorithms.community as nx_comm
from collections import defaultdict

np.set_printoptions(formatter={'float': '{:4}'.format, 'int': '{:4}'.format})
np.set_printoptions(linewidth=1000)

def equivalence(n, epsilon, seed=None, hard=True):
    """
    Returns an equivalence relation of size n.
    """
    
    rng = np.random.default_rng(seed)
    
    # Calculate minimum block size
    m = max(1, math.ceil(epsilon * n))
    m = min(m, n)  # Ensure m doesn't exceed n
    
    # Verify if at least two blocks can be formed
    if n < 2 * m:
        raise ValueError(f"Impossible to form two blocks: n={n} < 2*m={2*m} (m={m}, epsilon={epsilon})")
    
    # Determine maximum partitions possible
    k_max = n // m
    # Randomly choose number of partitions (at least 2)
    k = rng.integers(2, k_max + 1)
    
    # Calculate remaining elements after assigning minimum size
    r = n - k * m
    
    # Generate random composition for block sizes
    if r == 0:
        comp = [0] * k
    else:
        # Stars and bars method for composition
        positions = rng.choice(np.arange(1, r + k), size=k - 1, replace=False)
        positions_sorted = np.sort(positions)
        comp = [positions_sorted[0] - 1]
        for i in range(1, k - 1):
            comp.append(positions_sorted[i] - positions_sorted[i - 1] - 1)
        comp.append((r + k - 1) - positions_sorted[-1])
    
    # Calculate final block sizes
    sizes = [m + c for c in comp]
    
    # Create partitions
    if hard:
        # Assign elements randomly to blocks
        labels = np.repeat(np.arange(k), sizes)
        rng.shuffle(labels)
        blocks = [[] for _ in range(k)]
        for element in range(1, n + 1):
            blocks[labels[element - 1]].append(element)
        return blocks
    else:
        # Create contiguous blocks
        blocks = []
        start = 1
        for size in sizes:
            end = start + size - 1
            blocks.append(list(range(start, end + 1)))
            start = end + 1
        return blocks

def A(X):
    """
    Computes the adjacency matrix of an equivalence relation (X, ~)
    """
    
    n = sum([len(X_i) for X_i in X])
    A_X = np.zeros((n, n), dtype=int)

    for block in X:
        for i in block:
            for j in block:
                A_X[i-1][j-1] = 1  # Convert to 0-based indexing

    return A_X

def noisy(X, p):
    """Flips upper triangle entries (i < j) with probability p."""  
    A_X = A(X)    

    for i in range(n):
        for j in range(i + 1, n):
            if np.random.rand() < p:
                A_X[i, j] = 1 - A_X[i, j]  # Flip the bit
                A_X[j, i] = A_X[i, j]
    return A_X

def score(X, equivalence=False):
    """
    Returns the Score Matrix, as per Lemma 3.3
    """
# Build Adjacency Matrix First
    if equivalence:
        A_X = A(X)
        n = A_X.shape[0]
    else:  
        n = X.shape[0]
        A_X = X.copy()
        
    score = np.zeros((n, n), dtype=int)
    
    S_X = (A_X @ A_X) - 2*A_X + np.eye(n)
    
    return S_X

def graph(X, title="", equivalence=False):

    """
    Graphing tool for equivalence classes.
    P.S: The modularity algorithm is used to detect communities
    in the graph, another way to tackle the problem.
    """
    
# Get adjacency matrix
    if equivalence:
        A_X = A(X)
            
    else:
        A_X = X.copy()

# Graph using modularity algorithm
    
    np.fill_diagonal(A_X, 0)
    G = nx.from_numpy_array(A_X)
    
    clusters = list(nx_comm.greedy_modularity_communities(G))
        
    pos = {}
    canvas_width = 100  
    offset = canvas_width / len(clusters)

    for i, cluster in enumerate(clusters):
        cluster = list(cluster)
        n = len(cluster)
        radius = 7
        angle_step = 2 * np.pi / n

        for j, node in enumerate(cluster):
            angle = j * angle_step
            x = radius * np.cos(angle) + i * offset
            y = radius * np.sin(angle)
            pos[node] = np.array([x, y])

# Plot Results            

    plt.figure(figsize=(15, 2.25))  # canvas size control
    node_labels = {i: i + 1 for i in G.nodes} # 1-based indexing
    nx.draw(G, pos=pos, labels=node_labels, with_labels=True, node_color='lightblue', node_size=200)
    plt.title(title, fontsize=18)
    plt.show()
    print("N.B: Reflexivity i.e. self-connectedness does not reflect in this graph.\n")

def matrix(M1, M2=None, width=2, nan_repr='·', title=None, spacing=2, upper_1=False, upper_2=False):
    """
    Pretty-prints one/two 2D numpy arrays side by side with 
    optional alignment and NaN formatting.
    """
    if title:
        print(title)
    if upper_1:
        M1 = M1.astype(float)
        n = M1.shape[0]
        M1[np.tril_indices(n, k=-1)] = np.nan
        
    if upper_2:
        M2 = M2.astype(float)
        n = M2.shape[0]
        M1[np.tril_indices(n, k=-1)] = np.nan

    # Find the number of rows (assuming both matrices have the same number of rows)
    rows = max(M1.shape[0], M2.shape[0] if M2 is not None else 0)
    
    for i in range(rows):
        row_str = ""
        
        # Print from M1
        for j in range(M1.shape[1]):
            val = M1[i, j] if i < M1.shape[0] else np.nan
            if np.isnan(val):
                row_str += f"{nan_repr:>{width}}"
            elif isinstance(val, float) and val.is_integer():
                row_str += f"{int(val):{width}d}"
            else:
                row_str += f"{val:{width}}"
        
        # Add space between matrices
        row_str += ' ' * spacing

        # If M2 is provided, print from M2
        if M2 is not None:
            for j in range(M2.shape[1]):
                val = M2[i, j] if i < M2.shape[0] else np.nan
                if np.isnan(val):
                    row_str += f"{nan_repr:>{width}}"
                elif isinstance(val, float) and val.is_integer():
                    row_str += f"{int(val):{width}d}"
                else:
                    row_str += f"{val:{width}}"
        
        print(row_str)

def xi(n, n_i, p):
    """
    Computes an upper bound ξ₊ for the expected gap ξ, as per Lemma 3.1
    """
    return (n_i-2) - (3*n_i+epsilon*n-6)*p + (2*n-4)*(p**2)

def var(n, n_i, p):
    """
    Computes an upper bound σ²₊ for variance σ², as per Lemma 3.2
    """

    return (n_i - 2)*(3*p -9*p**2+10*p**3 -4*p**4)+ n*(p-3*p**2+6*p**3 - 4*p**4) + (n-n_i-epsilon*n)*(2*p**2-2*p**3)

def bernstein(sigma_2, t):
    """
    Returns the Bernstein concentration bound.
    """
    return np.exp(-(t**2)/(2*sigma_2 + (2/3)*t))

Noise Sensitivity on

Equivalence Relations

Omar Elshinawy

Bachelor of Science in

Mathematics

May 20, 2025

Preamble¶

`> noise_sensitivity.py`¶

Statement¶

Motivation¶

Strategy¶

Full Algorithm¶

Preamble¶

Functions¶

Noise Sensitivity on

Equivalence Relations

Omar Elshinawy

Bachelor of Science in

Mathematics

May 20, 2025

Preamble¶

> noise_sensitivity.py¶

Statement¶

Motivation¶

Strategy¶

Full Algorithm¶

Preamble¶

Functions¶

`> noise_sensitivity.py`¶