Source code for stellargraph.data.loader

# -*- coding: utf-8 -*-
#
# Copyright 2018-2020 Data61, CSIRO
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import os
import warnings
import pandas as pd

import networkx as nx
from stellargraph.data.epgm import EPGM
from stellargraph.core.graph import *
from stellargraph import globalvar


[docs]def from_epgm(epgm_location, dataset_name=None, directed=False): """ Imports a graph stored in EPGM format to a NetworkX object Args: epgm_location (str): The directory containing the EPGM data dataset_name (str), optional: The name of the dataset to import directed (bool): If True, load as a directed graph, otherwise load as an undirected graph Returns: A NetworkX graph containing the data for the EPGM-stored graph. """ G_epgm = EPGM(epgm_location) graphs = G_epgm.G["graphs"] # if dataset_name is not given, use the name of the 1st graph head if not dataset_name: dataset_name = graphs[0]["meta"]["label"] warnings.warn( "dataset name not specified, using dataset '{}' in the 1st graph head".format( dataset_name ), RuntimeWarning, ) # Select graph using dataset_name for g in graphs: if g["meta"]["label"] == dataset_name: graph_id = g["id"] # Convert to StellarGraph (via nx) Gnx = G_epgm.to_nx(graph_id, directed=directed) print( "Graph statistics: {} nodes, {} edges".format( Gnx.number_of_nodes(), Gnx.number_of_edges() ) ) return Gnx
def load_dataset_BlogCatalog3(location): """ This method loads the BlogCatalog3 network dataset (http://socialcomputing.asu.edu/datasets/BlogCatalog3) into a networkx undirected heterogeneous graph. The graph has two types of nodes, 'user' and 'group', and two types of edges, 'friend' and 'belongs'. The 'friend' edges connect two 'user' nodes and the 'belongs' edges connects 'user' and 'group' nodes. The node and edge types are not included in the dataset that is a collection of node and group ids along with the list of edges in the graph. Important note about the node IDs: The dataset uses integers for node ids. However, the integers from 1 to 39 are used as IDs for both users and groups. This would cause a confusion when constructing the networkx graph object. As a result, we convert all IDs to string and append the character 'u' to the integer ID for user nodes and the character 'g' to the integer ID for group nodes. Args: location: <str> The directory where the dataset is located Returns: A networkx Graph object. """ warnings.warn( "load_dataset_BlogCatalog3 has been replaced by `BlogCatalog3().load()`", DeprecationWarning, ) from stellargraph.datasets import BlogCatalog3 location = os.path.expanduser(location) return BlogCatalog3._load_from_location(location).to_networkx(feature_name=None)