Source code for stellargraph.layer.knowledge_graph

# -*- coding: utf-8 -*-
#
# Copyright 2020 Data61, CSIRO
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import activations, initializers, constraints, regularizers
from tensorflow.keras.layers import Input, Layer, Lambda, Dropout, Reshape, Embedding

from ..mapper.knowledge_graph import KGTripleGenerator
from ..core.experimental import experimental


[docs]class ComplExScore(Layer):
    """
    ComplEx scoring Keras layer.

    Original Paper: Complex Embeddings for Simple Link Prediction, Théo Trouillon, Johannes Welbl,
    Sebastian Riedel, Éric Gaussier and Guillaume Bouchard, ICML
    2016. http://jmlr.org/proceedings/papers/v48/trouillon16.pdf

    This combines subject, relation and object embeddings into a score of the likelihood of the
    link.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

[docs]    def build(self, input_shape):
        self.built = True

[docs]    def call(self, inputs):
        """
        Applies the layer.

        Args:

            inputs: a list of 6 tensors (each batch size x embedding dimension k), where the three
                consecutive pairs represent real and imaginary parts of the subject, relation and
                object embeddings, respectively, that is, ``inputs == [Re(subject), Im(subject),
                Re(relation), ...]``
        """
        s_re, s_im, r_re, r_im, o_re, o_im = inputs

        def inner(r, s, o):
            return tf.reduce_sum(r * s * o, axis=2)

        # expansion of Re(<w_r, e_s, conjugate(e_o)>)
        score = (
            inner(r_re, s_re, o_re)
            + inner(r_re, s_im, o_im)
            + inner(r_im, s_re, o_im)
            - inner(r_im, s_im, o_re)
        )

        return score


[docs]@experimental(
    reason="results from the reference paper have not been reproduced yet", issues=[862]
)
class ComplEx:
    """
    Embedding layers and a ComplEx scoring layers that implement the ComplEx knowledge graph
    embedding algorithm as in http://jmlr.org/proceedings/papers/v48/trouillon16.pdf

    Args:
        generator (KGTripleGenerator): A generator of triples to feed into the model.

        k (int): the dimension of the embedding (that is, a vector in C^k is learnt for each node
            and each link type)

        embeddings_initializer (str or func, optional): The initialiser to use for the embeddings
            (the default of random normal values matches the paper's reference implementation).

        embeddings_regularizer (str or func, optional): The regularizer to use for the embeddings.
    """

    def __init__(
        self,
        generator,
        k,
        embeddings_initializer="normal",
        embeddings_regularizer=None,
    ):
        if not isinstance(generator, KGTripleGenerator):
            raise TypeError(
                f"generator: expected KGTripleGenerator, found {type(generator).__name__}"
            )

        graph = generator.G
        self.num_nodes = graph.number_of_nodes()
        self.num_edge_types = len(graph._edges.types)
        self.k = k
        self.embeddings_initializer = initializers.get(embeddings_initializer)
        self.embeddings_regularizer = regularizers.get(embeddings_regularizer)

    # layer names
    _NODE_REAL = "COMPLEX_NODE_REAL"
    _NODE_IMAG = "COMPLEX_NODE_IMAG"

    _REL_REAL = "COMPLEX_EDGE_TYPE_REAL"
    _REL_IMAG = "COMPLEX_EDGE_TYPE_IMAG"

[docs]    @staticmethod
    def embeddings(model):
        """
        Retrieve the embeddings for nodes/entities and edge types/relations in the given model.

        Args:
            model (tensorflow.keras.Model): a Keras model created using a ``ComplEx`` instance.

        Returns:
            A tuple of numpy complex arrays: the first element is the embeddings for nodes/entities
            (``shape = number of nodes × k``), the second element is the embeddings for edge
            types/relations (``shape = number of edge types x k``).
        """
        node = 1j * model.get_layer(ComplEx._NODE_IMAG).embeddings.numpy()
        node += model.get_layer(ComplEx._NODE_REAL).embeddings.numpy()

        rel = 1j * model.get_layer(ComplEx._REL_IMAG).embeddings.numpy()
        rel += model.get_layer(ComplEx._REL_REAL).embeddings.numpy()

        return node, rel

    def _embed(self, count, name):
        return Embedding(
            count,
            self.k,
            name=name,
            embeddings_initializer=self.embeddings_initializer,
            embeddings_regularizer=self.embeddings_regularizer,
        )

    def __call__(self, x):
        """
        Apply embedding layers to the source, relation and object input "ilocs" (sequential integer
        labels for the nodes and edge types).

        Args:
            x (list): list of 3 tensors (each batch size x 1) storing the ilocs of the subject,
                relation and object elements for each edge in the batch.
        """
        s_iloc, r_iloc, o_iloc = x

        # ComplEx generates embeddings in C, which we model as separate real and imaginary
        # embeddings
        node_embeddings_real = self._embed(self.num_nodes, self._NODE_REAL)
        node_embeddings_imag = self._embed(self.num_nodes, self._NODE_IMAG)
        edge_type_embeddings_real = self._embed(self.num_edge_types, self._REL_REAL)
        edge_type_embeddings_imag = self._embed(self.num_edge_types, self._REL_IMAG)

        s_re = node_embeddings_real(s_iloc)
        s_im = node_embeddings_imag(s_iloc)

        r_re = edge_type_embeddings_real(r_iloc)
        r_im = edge_type_embeddings_imag(r_iloc)

        o_re = node_embeddings_real(o_iloc)
        o_im = node_embeddings_imag(o_iloc)

        scoring = ComplExScore()

        return scoring([s_re, s_im, r_re, r_im, o_re, o_im])

[docs]    def build(self):
        """
        Builds a ComplEx model.

        Returns:
            A tuple of (list of input tensors, tensor for ComplEx model score outputs)
        """
        s_iloc = Input(shape=1)
        r_iloc = Input(shape=1)
        o_iloc = Input(shape=1)

        x_inp = [s_iloc, r_iloc, o_iloc]
        x_out = self(x_inp)

        return x_inp, x_out