Source code for stellargraph.layer.knowledge_graph

# -*- coding: utf-8 -*-
#
# Copyright 2020 Data61, CSIRO
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import activations, initializers, constraints, regularizers
from tensorflow.keras.layers import Input, Layer, Lambda, Dropout, Reshape, Embedding

from ..mapper.knowledge_graph import KGTripleGenerator
from ..core.experimental import experimental


[docs]class ComplExScore(Layer): """ ComplEx scoring Keras layer. Original Paper: Complex Embeddings for Simple Link Prediction, Théo Trouillon, Johannes Welbl, Sebastian Riedel, Éric Gaussier and Guillaume Bouchard, ICML 2016. http://jmlr.org/proceedings/papers/v48/trouillon16.pdf This combines subject, relation and object embeddings into a score of the likelihood of the link. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs)
[docs] def build(self, input_shape): self.built = True
[docs] def call(self, inputs): """ Applies the layer. Args: inputs: a list of 6 tensors (each batch size x embedding dimension k), where the three consecutive pairs represent real and imaginary parts of the subject, relation and object embeddings, respectively, that is, ``inputs == [Re(subject), Im(subject), Re(relation), ...]`` """ s_re, s_im, r_re, r_im, o_re, o_im = inputs def inner(r, s, o): return tf.reduce_sum(r * s * o, axis=2) # expansion of Re(<w_r, e_s, conjugate(e_o)>) score = ( inner(r_re, s_re, o_re) + inner(r_re, s_im, o_im) + inner(r_im, s_re, o_im) - inner(r_im, s_im, o_re) ) return score
[docs]@experimental( reason="results from the reference paper have not been reproduced yet", issues=[862] ) class ComplEx: """ Embedding layers and a ComplEx scoring layers that implement the ComplEx knowledge graph embedding algorithm as in http://jmlr.org/proceedings/papers/v48/trouillon16.pdf Args: generator (KGTripleGenerator): A generator of triples to feed into the model. k (int): the dimension of the embedding (that is, a vector in C^k is learnt for each node and each link type) embeddings_initializer (str or func, optional): The initialiser to use for the embeddings (the default of random normal values matches the paper's reference implementation). embeddings_regularizer (str or func, optional): The regularizer to use for the embeddings. """ def __init__( self, generator, k, embeddings_initializer="normal", embeddings_regularizer=None, ): if not isinstance(generator, KGTripleGenerator): raise TypeError( f"generator: expected KGTripleGenerator, found {type(generator).__name__}" ) graph = generator.G self.num_nodes = graph.number_of_nodes() self.num_edge_types = len(graph._edges.types) self.k = k self.embeddings_initializer = initializers.get(embeddings_initializer) self.embeddings_regularizer = regularizers.get(embeddings_regularizer) # layer names _NODE_REAL = "COMPLEX_NODE_REAL" _NODE_IMAG = "COMPLEX_NODE_IMAG" _REL_REAL = "COMPLEX_EDGE_TYPE_REAL" _REL_IMAG = "COMPLEX_EDGE_TYPE_IMAG"
[docs] @staticmethod def embeddings(model): """ Retrieve the embeddings for nodes/entities and edge types/relations in the given model. Args: model (tensorflow.keras.Model): a Keras model created using a ``ComplEx`` instance. Returns: A tuple of numpy complex arrays: the first element is the embeddings for nodes/entities (``shape = number of nodes × k``), the second element is the embeddings for edge types/relations (``shape = number of edge types x k``). """ node = 1j * model.get_layer(ComplEx._NODE_IMAG).embeddings.numpy() node += model.get_layer(ComplEx._NODE_REAL).embeddings.numpy() rel = 1j * model.get_layer(ComplEx._REL_IMAG).embeddings.numpy() rel += model.get_layer(ComplEx._REL_REAL).embeddings.numpy() return node, rel
def _embed(self, count, name): return Embedding( count, self.k, name=name, embeddings_initializer=self.embeddings_initializer, embeddings_regularizer=self.embeddings_regularizer, ) def __call__(self, x): """ Apply embedding layers to the source, relation and object input "ilocs" (sequential integer labels for the nodes and edge types). Args: x (list): list of 3 tensors (each batch size x 1) storing the ilocs of the subject, relation and object elements for each edge in the batch. """ s_iloc, r_iloc, o_iloc = x # ComplEx generates embeddings in C, which we model as separate real and imaginary # embeddings node_embeddings_real = self._embed(self.num_nodes, self._NODE_REAL) node_embeddings_imag = self._embed(self.num_nodes, self._NODE_IMAG) edge_type_embeddings_real = self._embed(self.num_edge_types, self._REL_REAL) edge_type_embeddings_imag = self._embed(self.num_edge_types, self._REL_IMAG) s_re = node_embeddings_real(s_iloc) s_im = node_embeddings_imag(s_iloc) r_re = edge_type_embeddings_real(r_iloc) r_im = edge_type_embeddings_imag(r_iloc) o_re = node_embeddings_real(o_iloc) o_im = node_embeddings_imag(o_iloc) scoring = ComplExScore() return scoring([s_re, s_im, r_re, r_im, o_re, o_im])
[docs] def build(self): """ Builds a ComplEx model. Returns: A tuple of (list of input tensors, tensor for ComplEx model score outputs) """ s_iloc = Input(shape=1) r_iloc = Input(shape=1) o_iloc = Input(shape=1) x_inp = [s_iloc, r_iloc, o_iloc] x_out = self(x_inp) return x_inp, x_out