# -*- coding: utf-8 -*-
#
# Copyright 2018-2020 Data61, CSIRO
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
attri2vec
"""
__all__ = ["Attri2Vec"]
from tensorflow.keras import Input
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense, Lambda, Reshape, Embedding
import warnings
from .misc import deprecated_model_function
from ..mapper import Attri2VecLinkGenerator, Attri2VecNodeGenerator
def _require_without_generator(value, name):
if value is not None:
return value
else:
raise ValueError(
f"{name}: expected a value for 'input_dim', 'node_num' and 'multiplicity' when "
f"'generator' is not provided, found {name}=None."
)
[docs]class Attri2Vec:
"""
Implementation of the attri2vec algorithm of Zhang et al. with Keras layers.
see: https://arxiv.org/abs/1901.04095.
The model minimally requires specification of the layer sizes as a list of int
corresponding to the feature dimensions for each hidden layer and a generator object.
.. seealso::
Examples using Attri2Vec:
- `node classification <https://stellargraph.readthedocs.io/en/stable/demos/node-classification/attri2vec-node-classification.html>`__
- `link prediction <https://stellargraph.readthedocs.io/en/stable/demos/link-prediction/attri2vec-link-prediction.html>`__
- `unsupervised representation learning <https://stellargraph.readthedocs.io/en/stable/demos/embeddings/attri2vec-embeddings.html>`__
- `comparison of link prediction algorithms <https://stellargraph.readthedocs.io/en/stable/demos/link-prediction/homogeneous-comparison-link-prediction.html>`__
Appropriate data generators: :class:`.Attri2VecNodeGenerator`, :class:`.Attri2VecLinkGenerator`.
Args:
layer_sizes (list): Hidden feature dimensions for each layer.
generator (Sequence): A NodeSequence or LinkSequence.
bias (bool): If True a bias vector is learnt for each layer in the attri2vec model, default to False.
activation (str): The activation function of each layer in the attri2vec model, which takes values from ``linear``, ``relu`` and ``sigmoid`` (default).
normalize ("l2" or None): The normalization used after each layer, default to None.
input_dim (int, optional): The dimensions of the node features used as input to the model.
node_num (int, optional): The number of nodes in the given graph.
multiplicity (int, optional): The number of nodes to process at a time. This is 1 for a node
inference and 2 for link inference (currently no others are supported).
.. note::
The values for ``input_dim``, ``node_num``, and ``multiplicity`` are obtained from the
provided ``generator`` by default. The additional keyword arguments for these parameters
provide an alternative way to specify them if a generator cannot be supplied.
"""
def __init__(
self,
layer_sizes,
generator=None,
bias=False,
activation="sigmoid",
normalize=None,
input_dim=None,
node_num=None,
multiplicity=None,
):
if activation == "linear" or activation == "relu" or activation == "sigmoid":
self.activation = activation
else:
raise ValueError(
"Activation should be either 'linear', 'relu' or 'sigmoid'; received '{}'".format(
activation
)
)
if normalize == "l2":
self._normalization = Lambda(lambda x: K.l2_normalize(x, axis=-1))
elif normalize is None:
self._normalization = Lambda(lambda x: x)
else:
raise ValueError(
"Normalization should be either 'l2' or None; received '{}'".format(
normalize
)
)
# Get the model parameters from the generator or the keyword arguments
if generator is not None:
self._get_sizes_from_generator(generator)
else:
self.input_node_num = _require_without_generator(node_num, "node_num")
self.input_feature_size = _require_without_generator(input_dim, "input_dim")
self.multiplicity = _require_without_generator(multiplicity, "multiplicity")
# Model parameters
self.n_layers = len(layer_sizes)
self.bias = bias
# Feature dimensions for each layer
self.dims = [self.input_feature_size] + layer_sizes
# store the trainable layers
self._layers = [
Dense(layer_size, activation=self.activation, use_bias=self.bias)
for layer_size in layer_sizes
]
if self.multiplicity == 1:
self._output_embedding = None
else:
self._output_embedding = Embedding(
self.input_node_num,
layer_sizes[-1],
input_length=1,
name="output_embedding",
)
def _get_sizes_from_generator(self, generator):
"""
Sets node_num and input_feature_size from the generator.
Args:
generator: The supplied generator.
"""
if not isinstance(generator, (Attri2VecNodeGenerator, Attri2VecLinkGenerator)):
raise TypeError(
"Generator should be an instance of Attri2VecNodeGenerator or Attri2VecLinkGenerator"
)
self.multiplicity = generator.multiplicity
self.input_node_num = generator.graph.number_of_nodes()
feature_sizes = generator.graph.node_feature_sizes()
if len(feature_sizes) > 1:
raise RuntimeError(
"Attri2Vec called on graph with more than one node type."
)
self.input_feature_size = feature_sizes.popitem()[1]
def __call__(self, xin):
"""
Construct node representations from node attributes through deep neural network
Args:
xin (Keras Tensor): Batch input features
Returns:
Output tensor
"""
# Form Attri2Vec layers iteratively
h_layer = xin
for layer in self._layers:
h_layer = self._normalization(layer(h_layer))
return h_layer
def _node_model(self):
"""
Builds a Attri2Vec model for node representation prediction.
Returns:
tuple: ``(x_inp, x_out)`` where ``x_inp`` is a Keras input tensor
for the Attri2Vec model and ``x_out`` is the Keras tensor
for the Attri2Vec model output.
"""
# Create tensor inputs
x_inp = Input(shape=(self.input_feature_size,))
# Output from Attri2Vec model
x_out = self(x_inp)
return x_inp, x_out
def _link_model(self):
"""
Builds a Attri2Vec model for context node prediction.
Returns:
tuple: (x_inp, x_out) where ``x_inp`` is a list of Keras input tensors for (src, dst) nodes in the node pairs
and ``x_out`` is a list of output tensors for (src, dst) nodes in the node pairs
"""
# Expose input and output sockets of the model, for source node:
x_inp_src, x_out_src = self._node_model()
# Expose input and out sockets of the model, for target node:
x_inp_dst = Input(shape=(1,))
assert isinstance(self._output_embedding, Embedding)
x_out_dst = self._output_embedding(x_inp_dst)
x_out_dst = Reshape((self.dims[self.n_layers],))(x_out_dst)
x_inp = [x_inp_src, x_inp_dst]
x_out = [x_out_src, x_out_dst]
return x_inp, x_out
[docs] def in_out_tensors(self, multiplicity=None):
"""
Builds a Attri2Vec model for node or link/node pair prediction, depending on the generator used to construct
the model (whether it is a node or link/node pair generator).
Returns:
tuple: ``(x_inp, x_out)``, where ``x_inp`` is a list of Keras input tensors
for the specified Attri2Vec model (either node or link/node pair model) and ``x_out`` contains
model output tensor(s) of shape ``(batch_size, layer_sizes[-1])``
"""
if multiplicity is None:
multiplicity = self.multiplicity
if multiplicity == 1:
return self._node_model()
elif multiplicity == 2:
return self._link_model()
else:
raise RuntimeError(
"Currently only multiplicities of 1 and 2 are supported. Consider using node_model or "
"link_model method explicitly to build node or link prediction model, respectively."
)
def default_model(self, flatten_output=True):
warnings.warn(
"The .default_model() method is deprecated. Please use .in_out_tensors() method instead.",
DeprecationWarning,
stacklevel=2,
)
return self.in_out_tensors()
node_model = deprecated_model_function(_node_model, "node_model")
link_model = deprecated_model_function(_link_model, "link_model")
build = deprecated_model_function(in_out_tensors, "build")