图神经网络的研究与图嵌入或网络嵌入密切相关,图嵌入或网络嵌入是数据挖掘和机器学习界日益关注的另一个课题。图嵌入旨在通过保留图的网络拓扑结构和节点内容信息,将图中顶点表示为低维向量,以便使用简单的机器学习算法(例如,支持向量机分类)进行处理。许多图嵌入算法通常是无监督的算法,它们可以大致可以划分为三个类别,即矩阵分解、随机游走和深度学习方法。同时图嵌入的深度学习方法也属于图神经网络,包括基于图自动编码器的算法(如DNGR和SDNE)和无监督训练的图卷积神经网络(如GraphSage)。——https://zhuanlan.zhihu.com/p/75307407
下面给出一个图神经网络TensorFlow的实现,代码参考自:https://github.com/Ivan0131/gnn_demo。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import tensorflow as tf import numpy as np import gnn.gnn_utils as gnn_utils data_path = "./data" set_name = "sub_15_7_200" # 训练集 inp, arcnode, nodegraph, nodein, labels, _ = gnn_utils.set_load_general(data_path, "train", set_name=set_name) inp = [a[:, 1:] for a in inp] # 验证集 inp_val, arcnode_val, nodegraph_val, nodein_val, labels_val, _ = gnn_utils.set_load_general(data_path, "validation", set_name=set_name) inp_val = [a[:, 1:] for a in inp_val] input_dim = len(inp[0][0]) state_dim = 10 output_dim = 2 state_threshold = 0.001 max_iter = 50 tf.compat.v1.disable_eager_execution() tf.reset_default_graph() comp_inp = tf.placeholder(tf.float32, shape=(None, input_dim), name="input") y = tf.placeholder(tf.float32, shape=(None, output_dim), name="target") state = tf.placeholder(tf.float32, shape=(None, state_dim), name="state") state_old = tf.placeholder(tf.float32, shape=(None, state_dim), name="old_state") ArcNode = tf.sparse_placeholder(tf.float32, name="ArcNode") def f_w(inp): with tf.variable_scope('State_net'): layer1 = tf.layers.dense(inp, 5, activation=tf.nn.sigmoid) layer2 = tf.layers.dense(layer1, state_dim, activation=tf.nn.sigmoid) return layer2 def g_w(inp): with tf.variable_scope('Output_net'): layer1 = tf.layers.dense(inp, 5, activation=tf.nn.sigmoid) layer2 = tf.layers.dense(layer1, output_dim, activation=None) return layer2 def convergence(a, state, old_state, k): with tf.variable_scope('Convergence'): # assign current state to old state old_state = state # 获取子结点上一个时刻的状态 # grub states of neighboring node gat = tf.gather(old_state, tf.cast(a[:, 0], tf.int32)) print(a[:, 0]) # 去除第一列,即子结点的id # slice to consider only label of the node and that of it's neighbor # sl = tf.slice(a, [0, 1], [tf.shape(a)[0], tf.shape(a)[1] - 1]) # equivalent code sl = a[:, 1:] # 将子结点上一个时刻的状态放到最后一列 # concat with retrieved state inp = tf.concat([sl, gat], axis=1) print('inp', inp) # evaluate next state and multiply by the arch-node conversion matrix to obtain per-node states # 计算子结点对父结点状态的贡献 layer1 = f_w(inp) # 聚合子结点对父结点状态的贡献,得到当前时刻的父结点的状态 print('ArcNode', ArcNode) state = tf.sparse_tensor_dense_matmul(ArcNode, layer1) # update the iteration counter k = k + 1 return a, state, old_state, k def condition(a, state, old_state, k): # evaluate condition on the convergence of the state with tf.variable_scope('condition'): # 检查当前状态和上一个时刻的状态的欧式距离是否小于阈值 # evaluate distance by state(t) and state(t-1) outDistance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(state, old_state)), 1) + 1e-10) # vector showing item converged or not (given a certain threshold) checkDistanceVec = tf.greater(outDistance, state_threshold) print(outDistance) print(checkDistanceVec) c1 = tf.reduce_any(checkDistanceVec) print(c1) # 是否达到最大迭代次数 c2 = tf.less(k, max_iter) print(c2) return tf.logical_and(c1, c2) # 迭代计算,直到状态达到稳定状态 with tf.variable_scope('Loop'): k = tf.constant(0) res, st, old_st, num = tf.while_loop(condition, convergence, [comp_inp, state, state_old, k]) out = g_w(st) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=out)) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(y, 1)), dtype=tf.float32)) optimizer = tf.train.AdamOptimizer(0.001) grads = optimizer.compute_gradients(loss) train_op = optimizer.apply_gradients(grads, name='train_op') # 模型训练 num_epoch = 5000 # 训练集placeholder输入 arcnode_train = tf.SparseTensorValue(indices=arcnode[0].indices, values=arcnode[0].values, dense_shape=arcnode[0].dense_shape) fd_train = {comp_inp: inp[0], state: np.zeros((arcnode[0].dense_shape[0], state_dim)), state_old: np.ones((arcnode[0].dense_shape[0], state_dim)), ArcNode: arcnode_train, y: labels} # 验证集placeholder输入 arcnode_valid = tf.SparseTensorValue(indices=arcnode_val[0].indices, values=arcnode_val[0].values, dense_shape=arcnode_val[0].dense_shape) fd_valid = {comp_inp: inp_val[0], state: np.zeros((arcnode_val[0].dense_shape[0], state_dim)), state_old: np.ones((arcnode_val[0].dense_shape[0], state_dim)), ArcNode: arcnode_valid, y: labels_val} with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) for i in range(0, num_epoch): _, loss_val, accuracy_val = sess.run( [train_op, loss, accuracy], feed_dict=fd_train) if i % 100 == 0: loss_valid_val, accuracy_valid_val = sess.run( [loss, accuracy], feed_dict=fd_valid) print( "iter %s\t training loss: %s,\t training accuracy: %s,\t validation loss: %s,\t validation accuracy: %s" % (i, loss_val, accuracy_val, loss_valid_val, accuracy_valid_val)) |
数据集和参考代码:https://github.com/Ivan0131/gnn_demo