Source code for pygip.models.attack.mea.MEA
import os
import random
import time
import networkx as nx
import numpy as np
import torch
import torch.nn.functional as F
from dgl import DGLGraph
from tqdm import tqdm
from pygip.models.attack.base import BaseAttack
from pygip.models.nn import GCN, ShadowNet, AttackNet
from pygip.utils.metrics import GraphNeuralNetworkMetric
[docs]class ModelExtractionAttack(BaseAttack):
supported_api_types = {"dgl"}
supported_datasets = {"Cora", "CiteSeer", "PubMed", "Photo", "Computers", "CoauthorCS", "CoauthorPhysics"}
def __init__(self, dataset, attack_node_fraction, model_path=None, alpha=0.8):
super().__init__(dataset, attack_node_fraction, model_path)
self.alpha = alpha
self.graph = dataset.graph_data.to(self.device)
self.features = self.graph.ndata['feat']
self.labels = self.graph.ndata['label']
self.train_mask = self.graph.ndata['train_mask']
self.test_mask = self.graph.ndata['test_mask']
# meta data
self.num_nodes = dataset.num_nodes
self.num_features = dataset.num_features
self.num_classes = dataset.num_classes
# attack params
self.attack_node_num = int(dataset.num_nodes * attack_node_fraction)
if model_path is None:
self._train_target_model()
else:
self._load_model(model_path)
[docs] def _train_target_model(self):
"""
Train the target model (GCN) on the original graph.
"""
# Initialize GNN model
self.net1 = GCN(self.num_features, self.num_classes).to(self.device)
optimizer = torch.optim.Adam(self.net1.parameters(), lr=0.01, weight_decay=5e-4)
# Training loop
for epoch in range(200):
self.net1.train()
# Forward pass
logits = self.net1(self.graph, self.features)
logp = F.log_softmax(logits, dim=1)
loss = F.nll_loss(logp[self.train_mask], self.labels[self.train_mask])
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Validation (optional)
if epoch % 20 == 0:
self.net1.eval()
with torch.no_grad():
logits_val = self.net1(self.graph, self.features)
logp_val = F.log_softmax(logits_val, dim=1)
pred = logp_val.argmax(dim=1)
acc_val = (pred[self.test_mask] == self.labels[self.test_mask]).float().mean()
# You could print validation accuracy here
return self.net1
[docs] def _load_model(self, model_path):
"""
Load a pre-trained model from a file.
"""
self.net1 = GCN(self.num_features, self.num_classes).to(self.device)
self.net1.load_state_dict(torch.load(model_path))
self.net1.eval()
return self.net1
[docs]class ModelExtractionAttack0(ModelExtractionAttack):
def __init__(self, dataset, attack_node_fraction, model_path=None, alpha=0.8):
super().__init__(dataset, attack_node_fraction, model_path)
self.alpha = alpha
[docs] def attack(self):
"""
Main attack procedure.
1. Samples a subset of nodes (`sub_graph_node_index`) for querying.
2. Synthesizes features for neighboring nodes and their neighbors.
3. Builds a sub-graph, trains a new GCN on it, and evaluates
fidelity & accuracy w.r.t. the target model.
"""
try:
torch.cuda.empty_cache()
g = self.graph.clone().to(self.device)
g_matrix = g.adjacency_matrix().to_dense().cpu().numpy()
del g
sub_graph_node_index = np.random.choice(
self.num_nodes, self.attack_node_num, replace=False).tolist()
batch_size = 32
features_query = self.features.clone()
syn_nodes = []
for node_index in sub_graph_node_index:
one_step_node_index = self.get_nonzero_indices(g_matrix[node_index]).tolist()
syn_nodes.extend(one_step_node_index)
for first_order_node_index in one_step_node_index:
two_step_node_index = self.get_nonzero_indices(g_matrix[first_order_node_index]).tolist()
syn_nodes.extend(two_step_node_index)
sub_graph_syn_node_index = list(set(syn_nodes) - set(sub_graph_node_index))
total_sub_nodes = list(set(sub_graph_syn_node_index + sub_graph_node_index))
# Process synthetic nodes in batches
for i in range(0, len(sub_graph_syn_node_index), batch_size):
batch_indices = sub_graph_syn_node_index[i:i + batch_size]
for node_index in batch_indices:
features_query[node_index] = 0
one_step_node_index = self.get_nonzero_indices(g_matrix[node_index]).tolist()
one_step_node_index = list(set(one_step_node_index).intersection(set(sub_graph_node_index)))
num_one_step = len(one_step_node_index)
if num_one_step > 0:
for first_order_node_index in one_step_node_index:
this_node_degree = len(self.get_nonzero_indices(g_matrix[first_order_node_index]))
features_query[node_index] += (
self.features[first_order_node_index] * self.alpha /
torch.sqrt(torch.tensor(num_one_step * this_node_degree, device=self.device))
)
two_step_nodes = []
for first_order_node_index in one_step_node_index:
two_step_nodes.extend(self.get_nonzero_indices(g_matrix[first_order_node_index]).tolist())
total_two_step_node_index = list(set(two_step_nodes) - set(one_step_node_index))
total_two_step_node_index = list(
set(total_two_step_node_index).intersection(set(sub_graph_node_index)))
num_two_step = len(total_two_step_node_index)
if num_two_step > 0:
for second_order_node_index in total_two_step_node_index:
this_node_first_step_nodes = self.get_nonzero_indices(
g_matrix[second_order_node_index]).tolist()
this_node_second_step_nodes = set()
for nodes_in_this_node in this_node_first_step_nodes:
this_node_second_step_nodes.update(
self.get_nonzero_indices(g_matrix[nodes_in_this_node]).tolist())
this_node_second_step_nodes = this_node_second_step_nodes - set(this_node_first_step_nodes)
this_node_second_degree = len(this_node_second_step_nodes)
if this_node_second_degree > 0:
features_query[node_index] += (
self.features[second_order_node_index] * (1 - self.alpha) /
torch.sqrt(
torch.tensor(num_two_step * this_node_second_degree, device=self.device))
)
torch.cuda.empty_cache()
# Update masks
for i in range(self.num_nodes):
if i in sub_graph_node_index:
self.test_mask[i] = 0
self.train_mask[i] = 1
elif i in sub_graph_syn_node_index:
self.test_mask[i] = 1
self.train_mask[i] = 0
else:
self.test_mask[i] = 1
self.train_mask[i] = 0
# Create subgraph adjacency matrix
sub_g = np.zeros((len(total_sub_nodes), len(total_sub_nodes)))
for sub_index in range(len(total_sub_nodes)):
sub_g[sub_index] = g_matrix[total_sub_nodes[sub_index], total_sub_nodes]
del g_matrix
sub_train_mask = self.train_mask[total_sub_nodes]
sub_features = features_query[total_sub_nodes]
sub_labels = self.labels[total_sub_nodes]
# Get query labels
self.net1.eval()
with torch.no_grad():
g = self.graph.to(self.device)
logits_query = self.net1(g, features_query)
_, labels_query = torch.max(logits_query, dim=1)
sub_labels_query = labels_query[total_sub_nodes]
del logits_query
# Create DGL graph
sub_g = nx.from_numpy_array(sub_g)
sub_g.remove_edges_from(nx.selfloop_edges(sub_g))
sub_g.add_edges_from(zip(sub_g.nodes(), sub_g.nodes()))
sub_g = DGLGraph(sub_g)
sub_g = sub_g.to(self.device)
degs = sub_g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
norm = norm.to(self.device)
sub_g.ndata['norm'] = norm.unsqueeze(1)
# Train extraction model
net = GCN(self.num_features, self.num_classes).to(self.device)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-2, weight_decay=5e-4)
best_performance_metrics = GraphNeuralNetworkMetric()
print("=========Model Extracting==========================")
for epoch in tqdm(range(200)):
net.train()
logits = net(sub_g, sub_features)
logp = F.log_softmax(logits, dim=1)
loss = F.nll_loss(logp[sub_train_mask], sub_labels_query[sub_train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
focus_gnn_metrics = GraphNeuralNetworkMetric(
0, 0, net, g, self.features, self.test_mask, self.labels, labels_query
)
focus_gnn_metrics.evaluate()
best_performance_metrics.fidelity = max(
best_performance_metrics.fidelity, focus_gnn_metrics.fidelity)
best_performance_metrics.accuracy = max(
best_performance_metrics.accuracy, focus_gnn_metrics.accuracy)
if epoch % 10 == 0:
torch.cuda.empty_cache()
print("========================Final results:=========================================")
print(best_performance_metrics)
self.net2 = net
except RuntimeError as e:
print(f"Runtime error: {e}")
torch.cuda.empty_cache()
raise
[docs]class ModelExtractionAttack1(ModelExtractionAttack):
def __init__(self, dataset, attack_node_fraction):
super().__init__(dataset, attack_node_fraction)
self.attack_node_num = 700
current_dir = os.path.dirname(os.path.abspath(__file__))
generated_graph_dataset_path = os.path.join(current_dir, 'data', 'attack2_generated_graph',
dataset.__class__.__name__.lower())
self.selected_node_file = os.path.join(generated_graph_dataset_path, "selected_index.txt")
self.query_label_file = os.path.join(generated_graph_dataset_path, "query_labels.txt")
self.shadow_graph_file = os.path.join(generated_graph_dataset_path, "graph_label.txt")
[docs] def attack(self):
"""
Main attack procedure.
1. Reads selected nodes from file for training (attack) nodes.
2. Reads query labels from another file.
3. Builds a shadow graph from the given adjacency matrix file.
4. Trains a shadow model on the selected nodes, then evaluates
fidelity & accuracy against the original target graph.
"""
try:
torch.cuda.empty_cache()
with open(self.selected_node_file, "r") as selected_node_file:
attack_nodes = [int(line.strip()) for line in selected_node_file]
# Identify the test nodes
testing_nodes = [i for i in range(self.num_nodes) if i not in attack_nodes]
attack_features = self.features[attack_nodes]
# Update masks
for i in range(self.num_nodes):
if i in attack_nodes:
self.test_mask[i] = 0
self.train_mask[i] = 1
else:
self.test_mask[i] = 1
self.train_mask[i] = 0
sub_test_mask = self.test_mask
with open(self.query_label_file, "r") as query_label_file:
lines = query_label_file.readlines()
all_query_labels = []
attack_query = []
for line in lines:
node_id, label = map(int, line.split())
all_query_labels.append(label)
if node_id in attack_nodes:
attack_query.append(label)
attack_query = torch.LongTensor(attack_query).to(self.device)
all_query_labels = torch.LongTensor(all_query_labels).to(self.device)
with open(self.shadow_graph_file, "r") as shadow_graph_file:
lines = shadow_graph_file.readlines()
adj_matrix = np.zeros((self.attack_node_num, self.attack_node_num))
for line in lines:
src, dst = map(int, line.split())
adj_matrix[src][dst] = 1
adj_matrix[dst][src] = 1
g_shadow = np.asmatrix(adj_matrix)
sub_g = nx.from_numpy_array(g_shadow)
sub_g.remove_edges_from(nx.selfloop_edges(sub_g))
sub_g.add_edges_from(zip(sub_g.nodes(), sub_g.nodes()))
sub_g = DGLGraph(sub_g)
sub_g = sub_g.to(self.device)
degs = sub_g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
norm = norm.to(self.device)
sub_g.ndata['norm'] = norm.unsqueeze(1)
# Create target graph
adj_matrix = self.graph.adjacency_matrix().to_dense().cpu().numpy()
sub_g_b = nx.from_numpy_array(adj_matrix)
sub_g_b.remove_edges_from(nx.selfloop_edges(sub_g_b))
sub_g_b.add_edges_from(zip(sub_g_b.nodes(), sub_g_b.nodes()))
sub_g_b = DGLGraph(sub_g_b)
sub_g_b = sub_g_b.to(self.device)
degs = sub_g_b.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
norm = norm.to(self.device)
sub_g_b.ndata['norm'] = norm.unsqueeze(1)
net = ShadowNet(self.num_features, self.num_classes).to(self.device)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-2, weight_decay=5e-4)
dur = []
best_performance_metrics = GraphNeuralNetworkMetric()
print("===================Model Extracting================================")
for epoch in tqdm(range(200)):
if epoch >= 3:
t0 = time.time()
net.train()
logits = net(sub_g, attack_features)
logp = F.log_softmax(logits, dim=1)
loss = F.nll_loss(logp, attack_query)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >= 3:
dur.append(time.time() - t0)
with torch.no_grad():
focus_gnn_metrics = GraphNeuralNetworkMetric(
0, 0, net, sub_g_b, self.features, self.test_mask,
all_query_labels, self.labels
)
focus_gnn_metrics.evaluate()
best_performance_metrics.fidelity = max(
best_performance_metrics.fidelity, focus_gnn_metrics.fidelity)
best_performance_metrics.accuracy = max(
best_performance_metrics.accuracy, focus_gnn_metrics.accuracy)
if epoch % 10 == 0:
torch.cuda.empty_cache()
print(best_performance_metrics)
self.net2 = net
except RuntimeError as e:
print(f"Runtime error: {e}")
torch.cuda.empty_cache()
raise
[docs]class ModelExtractionAttack2(ModelExtractionAttack):
"""
ModelExtractionAttack2.
A strategy that randomly samples a fraction of nodes as attack nodes,
synthesizes identity features for all nodes, then trains an extraction
model. The leftover nodes become test nodes.
Inherits
--------
ModelExtractionAttack
"""
def __init__(self, dataset, attack_node_fraction, model_path=None):
super().__init__(dataset, attack_node_fraction, model_path)
[docs] def attack(self):
"""
Main attack procedure.
1. Randomly select `attack_node_num` nodes as training nodes.
2. Set up synthetic features as identity vectors for all nodes.
3. Train a `Net_attack` model on these nodes with the queried labels.
4. Evaluate fidelity & accuracy on a subset of leftover nodes.
"""
try:
torch.cuda.empty_cache()
attack_nodes = []
for i in range(self.attack_node_num):
candidate_node = random.randint(0, self.num_nodes - 1)
if candidate_node not in attack_nodes:
attack_nodes.append(candidate_node)
test_num = 0
for i in range(self.num_nodes):
if i in attack_nodes:
self.test_mask[i] = 0
self.train_mask[i] = 1
else:
if test_num < 1000:
self.test_mask[i] = 1
self.train_mask[i] = 0
test_num += 1
else:
self.test_mask[i] = 0
self.train_mask[i] = 0
self.net1.eval()
with torch.no_grad():
logits_query = self.net1(self.graph, self.features)
_, labels_query = torch.max(logits_query, dim=1)
syn_features_np = np.eye(self.num_nodes)
syn_features = torch.FloatTensor(syn_features_np).to(self.device)
g = self.graph.to(self.device)
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
norm = norm.to(self.device)
g.ndata['norm'] = norm.unsqueeze(1)
net_attack = AttackNet(self.num_nodes, self.num_classes).to(self.device)
optimizer_original = torch.optim.Adam(net_attack.parameters(), lr=5e-2, weight_decay=5e-4)
dur = []
best_performance_metrics = GraphNeuralNetworkMetric()
print("=========Model Extracting==========================")
for epoch in tqdm(range(200)):
if epoch >= 3:
t0 = time.time()
net_attack.train()
logits = net_attack(g, syn_features)
logp = F.log_softmax(logits, 1)
loss = F.nll_loss(logp[self.train_mask.to(self.device)], labels_query[self.train_mask].to(self.device))
optimizer_original.zero_grad()
loss.backward()
optimizer_original.step()
if epoch >= 3:
dur.append(time.time() - t0)
with torch.no_grad():
focus_gnn_metrics = GraphNeuralNetworkMetric(
0, 0, net_attack, g, syn_features,
self.test_mask.to(self.device),
self.labels.to(self.device),
labels_query.to(self.device)
)
focus_gnn_metrics.evaluate()
best_performance_metrics.fidelity = max(
best_performance_metrics.fidelity, focus_gnn_metrics.fidelity)
best_performance_metrics.accuracy = max(
best_performance_metrics.accuracy, focus_gnn_metrics.accuracy)
if epoch % 10 == 0:
torch.cuda.empty_cache()
print("========================Final results:=========================================")
print(best_performance_metrics)
except RuntimeError as e:
print(f"Runtime error: {e}")
torch.cuda.empty_cache()
raise
[docs]class ModelExtractionAttack3(ModelExtractionAttack):
"""
ModelExtractionAttack3.
A more complex extraction strategy that uses a "shadow graph index"
file to build partial subgraphs and merges them. It queries selected
nodes from a potential set and forms a combined adjacency matrix.
Inherits
--------
ModelExtractionAttack
"""
def __init__(self, dataset, attack_node_fraction, model_path=None):
super().__init__(dataset, attack_node_fraction, model_path)
[docs] def attack(self):
"""
Main attack procedure.
Steps:
1. Loads indices for two subgraphs from text files.
2. Selects `attack_node_num` nodes from the first subgraph index.
3. Merges subgraph adjacency matrices and constructs a new graph
with combined features.
4. Trains a new GCN and evaluates fidelity & accuracy w.r.t. the
original target.
"""
try:
torch.cuda.empty_cache()
g_numpy = self.graph.adjacency_matrix().to_dense().cpu().numpy()
current_dir = os.path.dirname(os.path.abspath(__file__))
shadow_graph_dataset_path = os.path.join(current_dir, 'data', 'attack3_shadow_graph',
self.dataset.__class__.__name__.lower())
sub_graph_index_b = []
with open(os.path.abspath(
os.path.join(shadow_graph_dataset_path, 'attack_6_sub_shadow_graph_index_attack_2.txt')),
'r') as fileObject:
for ip in fileObject:
sub_graph_index_b.append(int(ip))
sub_graph_index_a = []
with open(
os.path.abspath(os.path.join(shadow_graph_dataset_path, 'protential_1300_shadow_graph_index.txt')),
'r') as fileObject:
for ip in fileObject:
sub_graph_index_a.append(int(ip))
attack_node = []
while len(attack_node) < self.attack_node_num: # TODO potential bug: attack_node_num > all possible node
protential_node_index = random.randint(0, len(sub_graph_index_b) - 1)
protential_node = sub_graph_index_b[protential_node_index]
if protential_node not in attack_node:
attack_node.append(int(protential_node))
attack_features = self.features[attack_node].to(self.device)
attack_labels = self.labels[attack_node].to(self.device)
shadow_features = self.features[sub_graph_index_a].to(self.device)
shadow_labels = self.labels[sub_graph_index_a].to(self.device)
sub_graph_g_A = g_numpy[sub_graph_index_a]
sub_graph_g_a = sub_graph_g_A[:, sub_graph_index_a]
sub_graph_attack = g_numpy[attack_node]
sub_graph_Attack = sub_graph_attack[:, attack_node]
zeros_1 = np.zeros((len(attack_node), len(sub_graph_index_a)))
zeros_2 = np.zeros((len(sub_graph_g_a), len(attack_node)))
sub_graph_Attack = np.array(sub_graph_Attack)
sub_graph_g_a = np.array(sub_graph_g_a)
generated_graph_1 = np.concatenate((sub_graph_Attack, zeros_1), axis=1)
generated_graph_2 = np.concatenate((zeros_2, sub_graph_g_a), axis=1)
generated_graph = np.concatenate((generated_graph_1, generated_graph_2), axis=0)
generated_features = torch.cat((attack_features, shadow_features), dim=0).to(self.device)
generated_labels = torch.cat((attack_labels, shadow_labels), dim=0).to(self.device)
generated_train_mask = torch.ones(len(generated_features), dtype=torch.bool, device=self.device)
generated_test_mask = torch.ones(len(generated_features), dtype=torch.bool, device=self.device)
generated_g = nx.from_numpy_array(generated_graph)
generated_g.remove_edges_from(nx.selfloop_edges(generated_g))
generated_g.add_edges_from(zip(generated_g.nodes(), generated_g.nodes()))
generated_g = DGLGraph(generated_g)
generated_g = generated_g.to(self.device)
degs = generated_g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
norm = norm.to(self.device)
generated_g.ndata['norm'] = norm.unsqueeze(1)
sub_graph_g_B = g_numpy[sub_graph_index_b]
sub_graph_g_b = sub_graph_g_B[:, sub_graph_index_b]
sub_graph_features_b = self.features[sub_graph_index_b].to(self.device)
sub_graph_labels_b = self.labels[sub_graph_index_b].to(self.device)
sub_graph_train_mask_b = self.train_mask[sub_graph_index_b].to(self.device)
sub_graph_test_mask_b = self.test_mask[sub_graph_index_b].to(self.device)
test_mask_length = min(len(sub_graph_test_mask_b), len(generated_train_mask))
for i in range(test_mask_length):
if i >= 140:
generated_train_mask[i] = 0
sub_graph_test_mask_b[i] = 1
else:
generated_train_mask[i] = 1
sub_graph_test_mask_b[i] = 0
if len(sub_graph_test_mask_b) > test_mask_length:
sub_graph_test_mask_b[test_mask_length:] = 1
sub_g_b = nx.from_numpy_array(sub_graph_g_b)
sub_g_b.remove_edges_from(nx.selfloop_edges(sub_g_b))
sub_g_b.add_edges_from(zip(sub_g_b.nodes(), sub_g_b.nodes()))
sub_g_b = DGLGraph(sub_g_b)
sub_g_b = sub_g_b.to(self.device)
degs = sub_g_b.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
norm = norm.to(self.device)
sub_g_b.ndata['norm'] = norm.unsqueeze(1)
self.net1.eval()
with torch.no_grad():
logits_b = self.net1(sub_g_b, sub_graph_features_b)
_, query_b = torch.max(logits_b, dim=1)
net2 = GCN(self.num_features, self.num_classes).to(self.device)
optimizer_a = torch.optim.Adam(net2.parameters(), lr=1e-2, weight_decay=5e-4)
dur = []
best_performance_metrics = GraphNeuralNetworkMetric()
print("=========Model Extracting==========================")
for epoch in tqdm(range(300)):
if epoch >= 3:
t0 = time.time()
net2.train()
logits_a = net2(generated_g, generated_features)
logp_a = F.log_softmax(logits_a, 1)
loss_a = F.nll_loss(logp_a[generated_train_mask], generated_labels[generated_train_mask])
optimizer_a.zero_grad()
loss_a.backward()
optimizer_a.step()
if epoch >= 3:
dur.append(time.time() - t0)
with torch.no_grad():
focus_gnn_metrics = GraphNeuralNetworkMetric(
0, 0, net2, sub_g_b, sub_graph_features_b,
sub_graph_test_mask_b, sub_graph_labels_b, query_b
)
focus_gnn_metrics.evaluate()
best_performance_metrics.fidelity = max(
best_performance_metrics.fidelity, focus_gnn_metrics.fidelity)
best_performance_metrics.accuracy = max(
best_performance_metrics.accuracy, focus_gnn_metrics.accuracy)
if epoch % 10 == 0:
torch.cuda.empty_cache()
print("========================Final results:=========================================")
print(best_performance_metrics)
except RuntimeError as e:
print(f"Runtime error: {e}")
torch.cuda.empty_cache()
raise
except Exception as e:
print(f"Error occurred: {str(e)}")
print(f"Error type: {type(e)}")
torch.cuda.empty_cache()
raise
[docs]class ModelExtractionAttack4(ModelExtractionAttack):
"""
ModelExtractionAttack4.
Another graph-based strategy that reads node indices from files,
merges adjacency matrices, and links new edges based on feature similarity.
Inherits
--------
ModelExtractionAttack
"""
def __init__(self, dataset, attack_node_fraction, model_path=None):
super().__init__(dataset, attack_node_fraction, model_path)
self.model_path = model_path
[docs] def attack(self):
"""
Main attack procedure.
1. Reads two sets of node indices from text files.
2. Selects a fixed number of nodes from the target set for attack.
3. Builds a combined adjacency matrix with zero blocks, then populates
edges between shadow and attack nodes based on a distance threshold.
4. Trains a new GCN on this combined graph and evaluates fidelity & accuracy.
"""
try:
torch.cuda.empty_cache()
g_numpy = self.graph.adjacency_matrix().to_dense().cpu().numpy()
current_dir = os.path.dirname(os.path.abspath(__file__))
shadow_graph_dataset_path = os.path.join(current_dir, 'data', 'attack3_shadow_graph',
self.dataset.__class__.__name__.lower())
sub_graph_index_b = []
with open(os.path.abspath(os.path.join(shadow_graph_dataset_path, 'target_graph_index.txt')),
'r') as fileObject:
for ip in fileObject:
sub_graph_index_b.append(int(ip))
sub_graph_index_a = []
with open(
os.path.abspath(os.path.join(shadow_graph_dataset_path, 'protential_1200_shadow_graph_index.txt')),
'r') as fileObject:
for ip in fileObject:
sub_graph_index_a.append(int(ip))
attack_node_arg = 60
attack_node = []
while len(attack_node) < attack_node_arg:
protential_node_index = random.randint(0, len(sub_graph_index_b) - 1)
protential_node = sub_graph_index_b[protential_node_index]
if protential_node not in attack_node:
attack_node.append(int(protential_node))
attack_features = self.features[attack_node].cpu()
attack_labels = self.labels[attack_node].cpu()
shadow_features = self.features[sub_graph_index_a].cpu()
shadow_labels = self.labels[sub_graph_index_a].cpu()
sub_graph_g_A = np.array(g_numpy[sub_graph_index_a])
sub_graph_g_a = np.array(sub_graph_g_A[:, sub_graph_index_a])
sub_graph_Attack = np.zeros((len(attack_node), len(attack_node)))
zeros_1 = np.zeros((len(attack_node), len(sub_graph_index_a)))
zeros_2 = np.zeros((len(sub_graph_g_a), len(attack_node)))
generated_graph = np.block([
[sub_graph_Attack, zeros_1],
[zeros_2, sub_graph_g_a]
])
distance = []
for i in range(100):
index1 = i
index2_list = np.nonzero(sub_graph_g_a[i])[0].tolist()
for index2 in index2_list:
distance.append(float(np.linalg.norm(
shadow_features[index1].cpu().numpy() -
shadow_features[int(index2)].cpu().numpy())))
threshold = np.mean(distance)
max_threshold = max(distance)
generated_features = np.vstack((attack_features.cpu().numpy(), shadow_features.cpu().numpy()))
generated_labels = np.concatenate([attack_labels.cpu().numpy(), shadow_labels.cpu().numpy()])
for i in range(len(attack_features)):
for loop in range(1000):
j = random.randint(0, len(shadow_features) - 1)
if np.linalg.norm(generated_features[i] - generated_features[len(attack_features) + j]) < threshold:
generated_graph[i][len(attack_features) + j] = 1
generated_graph[len(attack_features) + j][i] = 1
break
if loop > 500:
if np.linalg.norm(
generated_features[i] - generated_features[len(attack_features) + j]) < max_threshold:
generated_graph[i][len(attack_features) + j] = 1
generated_graph[len(attack_features) + j][i] = 1
break
if loop == 999:
print("one isolated node!")
generated_train_mask = torch.ones(len(generated_features), dtype=torch.bool)
generated_test_mask = torch.ones(len(generated_features), dtype=torch.bool)
generated_features = torch.FloatTensor(generated_features).to(self.device)
generated_labels = torch.LongTensor(generated_labels).to(self.device)
generated_train_mask = generated_train_mask.to(self.device)
generated_test_mask = generated_test_mask.to(self.device)
generated_g = nx.from_numpy_array(generated_graph)
generated_g.remove_edges_from(nx.selfloop_edges(generated_g))
generated_g.add_edges_from(zip(generated_g.nodes(), generated_g.nodes()))
generated_g = DGLGraph(generated_g)
generated_g = generated_g.to(self.device)
degs = generated_g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
norm = norm.to(self.device)
generated_g.ndata['norm'] = norm.unsqueeze(1)
sub_graph_g_B = np.array(g_numpy[sub_graph_index_b])
sub_graph_g_b = np.array(sub_graph_g_B[:, sub_graph_index_b])
sub_graph_features_b = self.features[sub_graph_index_b].to(self.device)
sub_graph_labels_b = self.labels[sub_graph_index_b].to(self.device)
sub_graph_train_mask_b = self.train_mask[sub_graph_index_b].to(self.device)
sub_graph_test_mask_b = self.test_mask[sub_graph_index_b].to(self.device)
for i in range(len(sub_graph_test_mask_b)):
if i >= 300:
sub_graph_train_mask_b[i] = 0
sub_graph_test_mask_b[i] = 1
else:
sub_graph_train_mask_b[i] = 1
sub_graph_test_mask_b[i] = 0
sub_g_b = nx.from_numpy_array(sub_graph_g_b)
sub_g_b.remove_edges_from(nx.selfloop_edges(sub_g_b))
sub_g_b.add_edges_from(zip(sub_g_b.nodes(), sub_g_b.nodes()))
sub_g_b = DGLGraph(sub_g_b)
sub_g_b = sub_g_b.to(self.device)
degs = sub_g_b.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
norm = norm.to(self.device)
sub_g_b.ndata['norm'] = norm.unsqueeze(1)
self.net1.eval()
with torch.no_grad():
logits_b = self.net1(sub_g_b, sub_graph_features_b)
_, query_b = torch.max(logits_b, dim=1)
net2 = GCN(self.num_features, self.num_classes).to(self.device)
optimizer_a = torch.optim.Adam(net2.parameters(), lr=1e-2, weight_decay=5e-4)
dur = []
best_performance_metrics = GraphNeuralNetworkMetric()
print("=========Model Extracting==========================")
for epoch in tqdm(range(300)):
if epoch >= 3:
t0 = time.time()
net2.train()
logits_a = net2(generated_g, generated_features)
logp_a = F.log_softmax(logits_a, 1)
loss_a = F.nll_loss(logp_a[generated_train_mask], generated_labels[generated_train_mask])
optimizer_a.zero_grad()
loss_a.backward()
optimizer_a.step()
if epoch >= 3:
dur.append(time.time() - t0)
with torch.no_grad():
focus_gnn_metrics = GraphNeuralNetworkMetric(
0, 0, net2, sub_g_b, sub_graph_features_b,
sub_graph_test_mask_b, sub_graph_labels_b, query_b
)
focus_gnn_metrics.evaluate()
best_performance_metrics.fidelity = max(
best_performance_metrics.fidelity, focus_gnn_metrics.fidelity)
best_performance_metrics.accuracy = max(
best_performance_metrics.accuracy, focus_gnn_metrics.accuracy)
if epoch % 10 == 0:
torch.cuda.empty_cache()
print("========================Final results:=========================================")
print(best_performance_metrics)
except RuntimeError as e:
print(f"Runtime error: {e}")
torch.cuda.empty_cache()
raise
except Exception as e:
print(f"Error occurred: {str(e)}")
print(f"Error type: {type(e)}")
torch.cuda.empty_cache()
raise
[docs]class ModelExtractionAttack5(ModelExtractionAttack):
"""
ModelExtractionAttack5.
Similar to ModelExtractionAttack4, but uses a slightly different
strategy to link edges between nodes based on a threshold distance.
Inherits
--------
ModelExtractionAttack
"""
def __init__(self, dataset, attack_node_fraction, model_path=None):
super().__init__(dataset, attack_node_fraction, model_path)
self.model_path = model_path
[docs] def attack(self):
"""
Main attack procedure.
1. Reads two sets of node indices (for target and shadow nodes).
2. Builds a block adjacency matrix with all zero blocks, then links
edges between attack nodes and shadow nodes if the feature distance
is less than a threshold.
3. Trains a new GCN on this combined graph and evaluates fidelity & accuracy.
"""
try:
torch.cuda.empty_cache()
g_numpy = self.graph.adjacency_matrix().to_dense().cpu().numpy()
current_dir = os.path.dirname(os.path.abspath(__file__))
shadow_graph_dataset_path = os.path.join(current_dir, 'data', 'attack3_shadow_graph',
self.dataset.__class__.__name__.lower())
sub_graph_index_b = []
with open(os.path.abspath(os.path.join(shadow_graph_dataset_path, 'target_graph_index.txt')),
'r') as fileObject:
for ip in fileObject:
sub_graph_index_b.append(int(ip))
sub_graph_index_a = []
with open(
os.path.abspath(os.path.join(shadow_graph_dataset_path, 'protential_1200_shadow_graph_index.txt')),
'r') as fileObject:
for ip in fileObject:
sub_graph_index_a.append(int(ip))
attack_node = []
while len(attack_node) < 60:
protential_node_index = random.randint(0, len(sub_graph_index_b) - 1)
protential_node = sub_graph_index_b[protential_node_index]
if protential_node not in attack_node:
attack_node.append(int(protential_node))
attack_features = self.features[attack_node].cpu()
attack_labels = self.labels[attack_node].cpu()
shadow_features = self.features[sub_graph_index_a].cpu()
shadow_labels = self.labels[sub_graph_index_a].cpu()
sub_graph_g_A = np.array(g_numpy[sub_graph_index_a])
sub_graph_g_a = np.array(sub_graph_g_A[:, sub_graph_index_a])
sub_graph_Attack = np.zeros((len(attack_node), len(attack_node)))
zeros_1 = np.zeros((len(attack_node), len(sub_graph_index_a)))
zeros_2 = np.zeros((len(sub_graph_g_a), len(attack_node)))
generated_graph = np.block([
[sub_graph_Attack, zeros_1],
[zeros_2, sub_graph_g_a]
])
distance = []
for i in range(100):
index1 = i
index2_list = np.nonzero(sub_graph_g_a[i])[0].tolist()
for index2 in index2_list:
distance.append(float(np.linalg.norm(
shadow_features[index1].cpu().numpy() -
shadow_features[int(index2)].cpu().numpy())))
threshold = np.mean(distance)
max_threshold = max(distance)
generated_features = np.vstack((attack_features.cpu().numpy(),
shadow_features.cpu().numpy()))
generated_labels = np.concatenate([attack_labels.cpu().numpy(),
shadow_labels.cpu().numpy()])
for i in range(len(attack_features)):
for loop in range(1000):
j = random.randint(0, len(shadow_features) - 1)
feat_diff = generated_features[i] - generated_features[len(attack_features) + j]
dist = np.linalg.norm(feat_diff)
if dist < threshold:
generated_graph[i][len(attack_features) + j] = 1
generated_graph[len(attack_features) + j][i] = 1
break
if loop > 500 and dist < max_threshold:
generated_graph[i][len(attack_features) + j] = 1
generated_graph[len(attack_features) + j][i] = 1
break
if loop == 999:
print("one isolated node!")
generated_features = torch.FloatTensor(generated_features).to(self.device)
generated_labels = torch.LongTensor(generated_labels).to(self.device)
generated_train_mask = torch.ones(len(generated_features), dtype=torch.bool, device=self.device)
generated_test_mask = torch.ones(len(generated_features), dtype=torch.bool, device=self.device)
generated_g = nx.from_numpy_array(generated_graph)
generated_g.remove_edges_from(nx.selfloop_edges(generated_g))
generated_g.add_edges_from(zip(generated_g.nodes(), generated_g.nodes()))
generated_g = DGLGraph(generated_g)
generated_g = generated_g.to(self.device)
degs = generated_g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
norm = norm.to(self.device)
generated_g.ndata['norm'] = norm.unsqueeze(1)
sub_graph_g_B = np.array(g_numpy[sub_graph_index_b])
sub_graph_g_b = np.array(sub_graph_g_B[:, sub_graph_index_b])
sub_graph_features_b = self.features[sub_graph_index_b].to(self.device)
sub_graph_labels_b = self.labels[sub_graph_index_b].to(self.device)
sub_graph_train_mask_b = self.train_mask[sub_graph_index_b].to(self.device)
sub_graph_test_mask_b = self.test_mask[sub_graph_index_b].to(self.device)
for i in range(len(sub_graph_test_mask_b)):
if i >= 300:
sub_graph_train_mask_b[i] = 0
sub_graph_test_mask_b[i] = 1
else:
sub_graph_train_mask_b[i] = 1
sub_graph_test_mask_b[i] = 0
sub_g_b = nx.from_numpy_array(sub_graph_g_b)
sub_g_b.remove_edges_from(nx.selfloop_edges(sub_g_b))
sub_g_b.add_edges_from(zip(sub_g_b.nodes(), sub_g_b.nodes()))
sub_g_b = DGLGraph(sub_g_b)
sub_g_b = sub_g_b.to(self.device)
degs = sub_g_b.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
norm = norm.to(self.device)
sub_g_b.ndata['norm'] = norm.unsqueeze(1)
self.net1.eval()
with torch.no_grad():
logits_b = self.net1(sub_g_b, sub_graph_features_b)
_, query_b = torch.max(logits_b, dim=1)
net2 = GCN(self.num_features, self.num_classes).to(self.device)
optimizer_a = torch.optim.Adam(net2.parameters(), lr=1e-2, weight_decay=5e-4)
dur = []
best_performance_metrics = GraphNeuralNetworkMetric()
print("=========Model Extracting==========================")
for epoch in tqdm(range(300)):
if epoch >= 3:
t0 = time.time()
net2.train()
logits_a = net2(generated_g, generated_features)
logp_a = F.log_softmax(logits_a, 1)
loss_a = F.nll_loss(logp_a[generated_train_mask],
generated_labels[generated_train_mask])
optimizer_a.zero_grad()
loss_a.backward()
optimizer_a.step()
if epoch >= 3:
dur.append(time.time() - t0)
with torch.no_grad():
focus_gnn_metrics = GraphNeuralNetworkMetric(
0, 0, net2, sub_g_b, sub_graph_features_b,
sub_graph_test_mask_b, sub_graph_labels_b, query_b
)
focus_gnn_metrics.evaluate()
best_performance_metrics.fidelity = max(
best_performance_metrics.fidelity, focus_gnn_metrics.fidelity)
best_performance_metrics.accuracy = max(
best_performance_metrics.accuracy, focus_gnn_metrics.accuracy)
if epoch % 10 == 0:
torch.cuda.empty_cache()
print("========================Final results:=========================================")
print(best_performance_metrics)
except RuntimeError as e:
print(f"Runtime error: {e}")
torch.cuda.empty_cache()
raise
except Exception as e:
print(f"Error occurred: {str(e)}")
print(f"Error type: {type(e)}")
torch.cuda.empty_cache()
raise