Source code for stellargraph.data.loader

# -*- coding: utf-8 -*-
#
# Copyright 2018-2020 Data61, CSIRO
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import os
import warnings
import pandas as pd

import networkx as nx
from stellargraph.data.epgm import EPGM
from stellargraph.core.graph import *
from stellargraph import globalvar


[docs]def from_epgm(epgm_location, dataset_name=None, directed=False):
    """
    Imports a graph stored in EPGM format to a NetworkX object

    Args:
        epgm_location (str): The directory containing the EPGM data
        dataset_name (str), optional: The name of the dataset to import
        directed (bool): If True, load as a directed graph, otherwise
            load as an undirected graph

    Returns:
        A NetworkX graph containing the data for the EPGM-stored graph.
    """
    G_epgm = EPGM(epgm_location)
    graphs = G_epgm.G["graphs"]

    # if dataset_name is not given, use the name of the 1st graph head
    if not dataset_name:
        dataset_name = graphs[0]["meta"]["label"]
        warnings.warn(
            "dataset name not specified, using dataset '{}' in the 1st graph head".format(
                dataset_name
            ),
            RuntimeWarning,
        )

    # Select graph using dataset_name
    for g in graphs:
        if g["meta"]["label"] == dataset_name:
            graph_id = g["id"]

    # Convert to StellarGraph (via nx)
    Gnx = G_epgm.to_nx(graph_id, directed=directed)

    print(
        "Graph statistics: {} nodes, {} edges".format(
            Gnx.number_of_nodes(), Gnx.number_of_edges()
        )
    )
    return Gnx


def load_dataset_BlogCatalog3(location):
    """
    This method loads the BlogCatalog3 network dataset (http://socialcomputing.asu.edu/datasets/BlogCatalog3)
    into a networkx undirected heterogeneous graph.

    The graph has two types of nodes, 'user' and 'group', and two types of edges, 'friend' and 'belongs'.
    The 'friend' edges connect two 'user' nodes and the 'belongs' edges connects 'user' and 'group' nodes.

    The node and edge types are not included in the dataset that is a collection of node and group ids along with
    the list of edges in the graph.

    Important note about the node IDs: The dataset uses integers for node ids. However, the integers from 1 to 39 are
    used as IDs for both users and groups. This would cause a confusion when constructing the networkx graph object.
    As a result, we convert all IDs to string and append the character 'u' to the integer ID for user nodes and the
    character 'g' to the integer ID for group nodes.

    Args:
        location: <str> The directory where the dataset is located

    Returns:
        A networkx Graph object.

    """
    warnings.warn(
        "load_dataset_BlogCatalog3 has been replaced by `BlogCatalog3().load()`",
        DeprecationWarning,
    )
    from stellargraph.datasets import BlogCatalog3

    location = os.path.expanduser(location)
    return BlogCatalog3._load_from_location(location).to_networkx(feature_name=None)