# -*- coding: utf-8 -*-
#
# Copyright 2019-2020 Data61, CSIRO
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
node2vec
"""
__all__ = ["Node2Vec"]
from tensorflow.keras import Input
from tensorflow.keras.layers import Reshape, Embedding
import math
from tensorflow import keras
import warnings
from .misc import deprecated_model_function
from ..mapper import Node2VecLinkGenerator, Node2VecNodeGenerator
def _require_without_generator(value, name):
if value is not None:
return value
else:
raise ValueError(
f"{name}: expected a value for 'node_num' and 'multiplicity' when "
f"'generator' is not provided, found {name}=None."
)
[docs]class Node2Vec:
"""
Implementation of the Node2Vec algorithm of A. Grover and J. Leskovec with Keras layers.
see: https://snap.stanford.edu/node2vec/
The model minimally requires specification of the embedding size and a generator object.
Args:
emb_size (int): The dimension of node embeddings.
generator (Sequence): A NodeSequence or LinkSequence.
node_num(int, optional): The number of nodes in the given graph.
multiplicity (int, optional): The number of nodes to process at a time. This is 1 for a node inference
and 2 for link inference (currently no others are supported).
"""
def __init__(self, emb_size, generator=None, node_num=None, multiplicity=None):
# Get the node_num from the generator if it is given
self.generator = generator
if generator is not None:
self._get_sizes_from_generator(generator)
else:
self.input_node_num = _require_without_generator(node_num, "node_num")
self.multiplicity = _require_without_generator(multiplicity, "multiplicity")
# Model parameters
self.emb_size = emb_size
# Initialise the target embedding layer: input-to-hidden
target_embedding_initializer = keras.initializers.RandomUniform(
minval=-1.0, maxval=1.0
)
self.target_embedding = Embedding(
self.input_node_num,
self.emb_size,
input_length=1,
name="target_embedding",
embeddings_initializer=target_embedding_initializer,
)
# Initialise the context embedding layer: hidden-to-output
context_embedding_initializer = keras.initializers.TruncatedNormal(
stddev=1.0 / math.sqrt(self.emb_size * 1.0)
)
self.context_embedding = Embedding(
self.input_node_num,
self.emb_size,
input_length=1,
name="context_embedding",
embeddings_initializer=context_embedding_initializer,
)
def _get_sizes_from_generator(self, generator):
"""
Sets node_num and multiplicity from the generator.
Args:
generator: The supplied generator.
"""
if not isinstance(generator, (Node2VecNodeGenerator, Node2VecLinkGenerator)):
raise TypeError(
"Generator should be an instance of Node2VecNodeGenerator or Node2VecLinkGenerator"
)
self.multiplicity = generator.multiplicity
self.input_node_num = generator.graph.number_of_nodes()
if len(list(generator.graph.node_types)) > 1:
raise ValueError("Node2Vec called on graph with more than one node type.")
def __call__(self, xin, embedding):
"""
Construct node representations from node ids through a look-up table.
Args:
xin (Keras Tensor): Batch input node ids.
embedding (str): "target" for target_embedding, "context" for context_embedding
Returns:
Output tensor.
"""
if embedding == "target":
h_layer = self.target_embedding(xin)
elif embedding == "context":
h_layer = self.context_embedding(xin)
else:
raise ValueError(
'wrong embedding argument is supplied: {}, should be "target" or "context"'.format(
embedding
)
)
h_layer = Reshape((self.emb_size,))(h_layer)
return h_layer
def _node_model(self, embedding="target"):
"""
Builds a Node2Vec model for node prediction.
Args:
embedding (str): "target" for target_embedding, "context" for context_embedding
Returns:
tuple: (x_inp, x_out) where ``x_inp`` is a Keras input tensor
for the Node2Vec model and ``x_out`` is the Keras tensor
for the Node2Vec model output.
"""
# Create tensor inputs
x_inp = Input(shape=(1,))
# Output from Node2Vec model
x_out = self(x_inp, embedding)
return x_inp, x_out
def _link_model(self):
"""
Builds a Node2Vec model for link or node pair prediction.
Returns:
tuple: (x_inp, x_out) where ``x_inp`` is a list of Keras input tensors for (src, dst) nodes in the node pairs
and ``x_out`` is a list of output tensors for (src, dst) nodes in the node pairs.
"""
# Expose input and output sockets of the model, for source node:
x_inp_src, x_out_src = self._node_model("target")
x_inp_dst, x_out_dst = self._node_model("context")
x_inp = [x_inp_src, x_inp_dst]
x_out = [x_out_src, x_out_dst]
return x_inp, x_out
[docs] def in_out_tensors(self, multiplicity=None):
"""
Builds a Node2Vec model for node or link/node pair prediction, depending on the generator used to construct
the model (whether it is a node or link/node pair generator).
Returns:
tuple: (x_inp, x_out), where ``x_inp`` contains Keras input tensor(s)
for the specified Node2Vec model (either node or link/node pair model) and ``x_out`` contains
model output tensor(s) of shape (batch_size, self.emb_size)
"""
if multiplicity is None:
multiplicity = self.multiplicity
if self.multiplicity == 1:
return self._node_model()
elif self.multiplicity == 2:
return self._link_model()
else:
raise ValueError("Currently only multiplicities of 1 and 2 are supported.")
def default_model(self, flatten_output=True):
warnings.warn(
"The .default_model() method is deprecated. Please use .in_out_tensors() method instead.",
DeprecationWarning,
stacklevel=2,
)
return self.build()
node_model = deprecated_model_function(_node_model, "node_model")
link_model = deprecated_model_function(_link_model, "link_model")
build = deprecated_model_function(in_out_tensors, "build")