1.基本操作
1.1 读取数据集(以KarateClub为例)
import networkx as nxG = nx.karate_club_graph()
print(type(G))# 可视化图
nx.draw(G, with_labels=True)
1.2 节点的平均度数
def average_degree(num_edges, num_nodes):avg_degree = 0# 节点的平均度数为2*E/Navg_degree = round(2*num_edges/num_nodes)return avg_degreenum_edges = G.number_of_edges()
num_nodes = G.number_of_nodes()
avg_degree = average_degree(num_edges, num_nodes)
print("Average degree of karate club network is {}".format(avg_degree))
1.3 平均聚类系数 Clustering coefficient
def average_clustering_coefficient(G):avg_cluster_coef = 0# 可以调用networkX的average_clusteringavg_cluster_coef=round(nx.average_clustering(G),2)return avg_cluster_coefavg_cluster_coef = average_clustering_coefficient(G)
print("Average clustering coefficient of karate club network is {}".format(avg_cluster_coef))
1.4 节点0经过一次迭代后的PageRank
# 节点j的重要性 等于 指向节点j的所有节点i的重要性/节点i的出度 的和
# 考虑random surfer,将beita定义为从节点i到节点j的概率
# 所以节点j的重要性 等于 beita*指向节点j的所有节点i的重要性/节点i的出度 的和 + (1-beita)/N
# 这里只迭代一次
def one_iter_pagerank(G, beta, r0, node_id):r1 = 0# 遍历指定节点的所有邻居for neighbor in nx.neighbors(G, node_id):di = G.degree[neighbor]r1+=beta*r0/dir1+=(1-beta)*(1/G.number_of_nodes())return r1beta = 0.8
r0 = 1 / G.number_of_nodes()
node = 0
r1 = one_iter_pagerank(G, beta, r0, node)
print("The PageRank value for node 0 after one iteration is {}".format(r1))# 也可以调用nx.pagerank
r1 = nx.pagerank(G, alpha=beta)
print("The PageRank value for node 0 after one iteration is {}".format(r1))
1.5 节点5的紧密中心性closeness centrality
# 节点u的紧密中心性 等于 1/节点u与其他所有节点v的最短距离之和
def closeness_centrality(G, node=5):closeness = 0# 调用networkX的shortest_path_length获得最短路径shortest_path = nx.shortest_path_length(G,source=5)sum_length = 0for i in range(G.number_of_nodes()):sum_length+=shortest_path[i]closeness = 1/sum_lengthreturn closeness
node = 5
closeness = closeness_centrality(G, node=node)
print("The node 5 has closeness centrality {}".format(closeness))# 也可以直接调用networkX的closeness centrality
# 注意这个库函数做了规范化(乘了 (图节点数量-1) )
closeness = nx.closeness_centrality(G, u=5)
closeness=closeness/(G.number_of_nodes()-1)
print("The node 5 has closeness centrality {}".format(closeness))
2.图转换成Tensor类型
2.1 获取图的edge list,将其转换为torch.LongTensor
import torch# 读取所有的边
def graph_to_edge_list(G):edge_list = []for edge in G.edges():edge_list.append(edge)return edge_list# 转换成Tensor
def edge_list_to_tensor(edge_list):edge_index = torch.tensor(edge_list).Treturn edge_indexpos_edge_list = graph_to_edge_list(G)
pos_edge_index = edge_list_to_tensor(pos_edge_list)
print("The pos_edge_index tensor has shape {}".format(pos_edge_index.shape))
2.2 负边采样,从图中抽样一定数量的"Negative" edges(注:"Negative" edges指的是图中不存在的边)
import randomdef sample_negative_edges(G, num_neg_samples):neg_edge_list = []# nx.non_edges对于无向图,不会出现重复的节点对(一条边只出现一次)non_edges_one_side=list(nx.non_edges(G))neg_edge_list_indices=random.sample(range(0,len(non_edges_one_side)),num_neg_samples)for i in neg_edge_list_indices:neg_edge_list.append(non_edges_one_side[i])return neg_edge_list# Sample 78 negative edges
neg_edge_list = sample_negative_edges(G, len(pos_edge_list))# Transform the negative edge list to tensor
neg_edge_index = edge_list_to_tensor(neg_edge_list)
print("The neg_edge_index tensor has shape {}".format(neg_edge_index.shape))
print("The neg_edge_index : {}".format(neg_edge_index))# Which of following edges can be negative ones?
edge_1 = (7, 1)
edge_2 = (1, 33)
edge_3 = (33, 22)
edge_4 = (0, 4)
edge_5 = (4, 2)
print('edge_1'+(" can't" if G.has_edge(edge_1[0],edge_1[1]) else ' can')+' be negative edge')
print('edge_2'+(" can't" if G.has_edge(edge_2[0],edge_2[1]) else ' can')+' be negative edge')
print('edge_3'+(" can't" if G.has_edge(edge_3[0],edge_3[1]) else ' can')+' be negative edge')
print('edge_4'+(" can't" if G.has_edge(edge_4[0],edge_4[1]) else ' can')+' be negative edge')
print('edge_5'+(" can't" if G.has_edge(edge_5[0],edge_5[1]) else ' can')+' be negative edge')
3. Node Emebedding
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA# 初始化一个embedding layer
# 每个node对应一个embedding,每个embedding都是一个8维的向量
emb_sample = nn.Embedding(num_embeddings=4, embedding_dim=8)
print('Sample embedding layer: {}'.format(emb_sample))# 整个embedding相当于是一个矩阵,每一行存储一个对象的embedding
# embedding.weight.data是embedding矩阵的值# 索引选择
ids = torch.LongTensor([1, 3])
print(emb_sample(ids))# 权重矩阵
shape = emb_sample.weight.data.shape
print(shape)# 赋值
emb_sample.weight.data = torch.ones(shape)
print(emb_sample.weight.data)
3.1 创建并初始化Enbedding
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA# 初始化一个embedding layer
# 每个node对应一个embedding,每个embedding都是一个8维的向量
emb_sample = nn.Embedding(num_embeddings=4, embedding_dim=8)
print('Sample embedding layer: {}'.format(emb_sample))# 整个embedding相当于是一个矩阵,每一行存储一个对象的embedding
# embedding.weight.data是embedding矩阵的值# 索引选择
ids = torch.LongTensor([1, 3])
print(emb_sample(ids))# 权重矩阵
shape = emb_sample.weight.data.shape
print(shape)# 赋值
emb_sample.weight.data = torch.ones(shape)
print(emb_sample.weight.data)
3.2 可视化Embedding
def visualize_emb(emb):# 将embedding的矩阵转换成numpy,通过PCA降到二维X = emb.weight.data.numpy()pca = PCA(n_components=2)X = pca.fit_transform(X)plt.figure(figsize=(6, 6))club1_x = []club1_y = []club2_x = []club2_y = []for node in G.nodes(data=True):if node[1]['club'] == 'Mr. Hi':club1_x.append(X[node[0]][0])club1_y.append(X[node[0]][1])else:club2_x.append(X[node[0]][0])club2_y.append(X[node[0]][1])plt.scatter(club1_x, club1_y, color="red", label="Mr. Hi")plt.scatter(club2_x, club2_y, color="blue", label="Officer")plt.legend()plt.show()visualize_emb(emb)
3.3 训练Enbedding
from torch.optim import SGD
import torch.nn as nndef accuracy(pred, label):accu = ((pred>0.5)==label).sum().item()/(pred.shape[0])return accudef train(emb, loss_fn, sigmoid, train_label, train_edge):epochs = 1000learning_rate = 0.1optimizer = SGD(emb.parameters(), lr=learning_rate, momentum=0.9)for i in range(epochs):optimizer.zero_grad()# 得到需要训练的边的embeddingtrain_node_emb=emb(train_edge)# 节点对之间embedding相乘dot_product_result=train_node_emb[0].mul(train_node_emb[1])dot_product_result=torch.sum(dot_product_result,1)# sigmoidsigmoid_result=sigmoid(dot_product_result)loss_result=loss_fn(sigmoid_result,train_label)if i % 50 == 0:print(loss_result)print(accuracy(sigmoid_result,train_label))# Updateloss_result.backward()optimizer.step()loss_fn = nn.BCELoss()
sigmoid = nn.Sigmoid()# 读取图
G = nx.karate_club_graph()
num_node = G.number_of_nodes()
num_edge = G.number_of_edges()
# 初始化embedding
emb = create_node_emb(num_node, embedding_dim=16)# 读取图中的边
pos_edge_list = graph_to_edge_list(G)
# 转换为tensor
pos_edge_index = edge_list_to_tensor(pos_edge_list)
neg_edge_list = sample_negative_edges(G,len(pos_edge_list))
neg_edge_index =edge_list_to_tensor(neg_edge_list)# Generate the positive and negative labels
pos_label = torch.ones(pos_edge_index.shape[1], )
neg_label = torch.zeros(neg_edge_index.shape[1], )# Concat positive and negative labels into one tensor
train_label = torch.cat([pos_label, neg_label], dim=0)# Concat positive and negative edges into one tensor
# Since the network is very small, we do not split the edges into val/test sets
train_edge = torch.cat([pos_edge_index, neg_edge_index], dim=1)train(emb, loss_fn, sigmoid, train_label, train_edge)
visualize_emb(emb)