Handling Graphs with NetworkX¶

Python offers the library NetworkX for manipulating graphs. You can learn more here:

https://networkx.github.io/

https://networkx.github.io/documentation/stable/tutorial.html

In [2]:
import networkx as nx
import matplotlib.pyplot as plt

%matplotlib inline

Creating a graph¶

In [31]:
G = nx.Graph()

Add nodes to the graph

In [32]:
G.add_node(1)
G.add_nodes_from([2,3])
G.add_node('Alice')
G.add_node('Bob')
print(G.nodes())
[1, 2, 3, 'Alice', 'Bob']

Draw the graph

In [33]:
nx.draw_networkx(G)

Add edges to the graph

In [34]:
G.add_edge(1,2)
G.add_edges_from([(1,3),('Alice','Bob')])
e = (1,'Alice')
G.add_edge(*e)
In [35]:
nx.draw_networkx(G)

Adding an edge with a new node will create the node Charlie in the graph

In [36]:
G.add_edge('Alice','Charlie')
print(G.edges())
print(G.nodes())
nx.draw_networkx(G)
[(1, 2), (1, 3), (1, 'Alice'), ('Alice', 'Bob'), ('Alice', 'Charlie')]
[1, 2, 3, 'Alice', 'Bob', 'Charlie']

A graph is a dictionary with nodes as the keys

Each node is a dictionary with the neighbors as the keys, and the edge properties as values

In [37]:
type(G)
Out[37]:
networkx.classes.graph.Graph
In [40]:
for n in G: print(G[n])
{2: {}, 3: {}, 'Alice': {}}
{1: {}}
{1: {}}
{'Bob': {}, 1: {}, 'Charlie': {}}
{'Alice': {}}
{'Alice': {}}
In [38]:
G[1]
Out[38]:
AtlasView({2: {}, 3: {}, 'Alice': {}})
In [41]:
print(G.nodes)
print(G.edges)
[1, 2, 3, 'Alice', 'Bob', 'Charlie']
[(1, 2), (1, 3), (1, 'Alice'), ('Alice', 'Bob'), ('Alice', 'Charlie')]
In [88]:
print(G.nodes[1])
{}

Creating a graph from edges

In [42]:
G2 = nx.Graph()
G2.add_edges_from([(1,2),(1,3),('Alice','Bob'),(1,'Alice')])
print(G2.nodes())
print(G2.edges())
nx.draw_networkx(G2)
[1, 2, 3, 'Alice', 'Bob']
[(1, 2), (1, 3), (1, 'Alice'), ('Alice', 'Bob')]
In [43]:
G2.remove_edge(1,3)
G2.remove_node(3)
G2.remove_node(1)
print(G2.nodes())
print(G2.edges())
nx.draw_networkx(G2)
[2, 'Alice', 'Bob']
[('Alice', 'Bob')]

Reading a graph from a file with list of edges

https://networkx.org/documentation/stable/reference/readwrite/index.html

In [73]:
#Read a graph from a list of edges
G3 = nx.read_edgelist('graph_edges.txt')
print(G3.nodes())
print(G3.edges())
nx.draw_networkx(G3)
['1', '2', '3', 'Alice', 'Bob', 'Charlie']
[('1', '2'), ('1', '3'), ('1', 'Alice'), ('2', '3'), ('Alice', 'Bob'), ('Bob', 'Charlie')]

Graph attributes¶

You can assign attributes and values to the nodes of the graph

In [48]:
G3.nodes['Alice']['gender'] = 'female'
G3.nodes['Bob']['gender'] = 'male'
G3.nodes['Charlie']['gender'] = 'male'
G3.nodes['1']['value'] = 1
G3.nodes['2']['value'] = -1
G3.nodes['3']['value'] = 0
for n in G3.nodes():
    print(G3.nodes[n]) 
{'value': 1}
{'value': -1}
{'value': 0}
{'gender': 'female'}
{'gender': 'male'}
{'gender': 'male', 'value': 1}
In [47]:
for n in G3.nodes():
    print(G3[n])
{'2': {}, '3': {}, 'Alice': {}}
{'1': {}, '3': {}}
{'1': {}, '2': {}}
{'Bob': {}, '1': {}}
{'Alice': {}, 'Charlie': {}}
{'Bob': {}}
In [49]:
G3.nodes['Alice']['value'] = 1
G3.nodes['Bob']['value'] = -1
G3.nodes['Charlie']['value'] = 1
for n in G3.nodes():
    print(n+ ":" + str(G3.nodes[n]['value']))

for n in G3.nodes():
    print(n,G3.nodes[n])
1:1
2:-1
3:0
Alice:1
Bob:-1
Charlie:1
1 {'value': 1}
2 {'value': -1}
3 {'value': 0}
Alice {'gender': 'female', 'value': 1}
Bob {'gender': 'male', 'value': -1}
Charlie {'gender': 'male', 'value': 1}

You can also add attributes and values to the edges in the graph

In [50]:
G3['Alice']['Bob']['label'] = 'strong'
print(G3['Bob']['Alice'])
print(G3['Alice'])
print(G3['Bob'])
{'label': 'strong'}
{'Bob': {'label': 'strong'}, '1': {}}
{'Alice': {'label': 'strong'}, 'Charlie': {}}
In [52]:
pos=nx.spring_layout(G3)
nx.draw_networkx(G3, pos)
labels = nx.get_edge_attributes(G3,'label')
print(labels)
nx.draw_networkx_edge_labels(G3, pos, edge_labels=labels)
#plt.show()
{('Alice', 'Bob'): 'strong'}
Out[52]:
{('Alice', 'Bob'): Text(0.3201819780365797, 0.3734059405104533, 'strong')}

Weighted graphs¶

A special attribute of a an edge is the "weight". When adding weighted edges, you enter triples consisting of the two edge endpoints and the weight of the edge. This weight is stored in an attribute "weight" by default.

In [53]:
G4 = nx.Graph()
G4.add_weighted_edges_from([(1,2,0.5),(2,3,0.1),(3,4,0.7)])
for (a,b) in G4.edges():
    print (G4[a][b])
for (a,b,w) in G4.edges(data =True): #data=True returns weight as well
    print (str(a)+" "+ str(b) + " " + str(w['weight']))
for n in G4:
    print(G4[n])
{'weight': 0.5}
{'weight': 0.1}
{'weight': 0.7}
1 2 0.5
2 3 0.1
3 4 0.7
{2: {'weight': 0.5}}
{1: {'weight': 0.5}, 3: {'weight': 0.1}}
{2: {'weight': 0.1}, 4: {'weight': 0.7}}
{3: {'weight': 0.7}}
In [55]:
pos=nx.spring_layout(G4)
nx.draw_networkx(G4, pos)
weights = nx.get_edge_attributes(G4,'weight')
nx.draw_networkx_edge_labels(G4, pos, edge_labels=weights)
Out[55]:
{(1, 2): Text(0.3279379965905198, 0.7527674124114143, '0.5'),
 (2, 3): Text(-0.006399362968921046, -0.014497447633368132, '0.1'),
 (3, 4): Text(-0.3279379965905198, -0.7527674124114143, '0.7')}

Directed Graphs¶

In [59]:
DG=nx.DiGraph()
DG.add_weighted_edges_from([(1,2,0.5), (3,1,0.75), (1,4,0.1)])
print(DG.edges())
for n in DG:
    print(n, DG[n])

pos=nx.spring_layout(DG)
nx.draw_networkx(DG,pos)
weights = nx.get_edge_attributes(DG,'weight')
nx.draw_networkx_edge_labels(DG, pos, edge_labels=weights)
[(1, 2), (1, 4), (3, 1)]
1 {2: {'weight': 0.5}, 4: {'weight': 0.1}}
2 {}
3 {1: {'weight': 0.75}}
4 {}
Out[59]:
{(1, 2): Text(-0.5409094167445062, 0.11714670129479471, '0.5'),
 (1, 4): Text(0.36959732716325744, -0.15532358056810397, '0.1'),
 (3, 1): Text(-0.08949325609223646, 0.14233855382008131, '0.75')}

You can transform a directed graph into an undirected on using to_undirected()

In [60]:
UG = DG.to_undirected()
print(UG.edges())
for n in UG:
    print(n, UG[n])
nx.draw_networkx(UG)
[(1, 2), (1, 4), (1, 3)]
1 {2: {'weight': 0.5}, 4: {'weight': 0.1}, 3: {'weight': 0.75}}
2 {1: {'weight': 0.5}}
3 {1: {'weight': 0.75}}
4 {1: {'weight': 0.1}}

Graph Operations¶

Some common graph operations and algorithms are implemented in networkx library.

http://networkx.readthedocs.io/en/networkx-1.11/reference/algorithms.html

Neighbors, degrees and adjancency matrix

In [61]:
nx.draw_networkx(G)
In [63]:
print(G.neighbors(1)) # returns the neighbors of a node
for x in G.neighbors(1):
    print(x)
print('degree of 1:',G.degree(1)) # returns the degree of a node
<dict_keyiterator object at 0x0000018237D5CCC0>
2
3
Alice
degree of 1: 3
In [64]:
A = nx.adjacency_matrix(G)
print(A) 
#the adjacency matrix is stored as a sparse matrix
print(type(A))
  (0, 1)	1
  (0, 2)	1
  (0, 3)	1
  (1, 0)	1
  (2, 0)	1
  (3, 0)	1
  (3, 4)	1
  (3, 5)	1
  (4, 3)	1
  (5, 3)	1
<class 'scipy.sparse.csr.csr_matrix'>
In [65]:
print(G4[3])
print(G4.degree(3, weight='weight'))
print(G4.degree(3))
{2: {'weight': 0.1}, 4: {'weight': 0.7}}
0.7999999999999999
2

Neighbors and degrees for directed or weighted graphs

In [69]:
pos=nx.spring_layout(DG)
nx.draw_networkx(DG,pos)
weights = nx.get_edge_attributes(DG,'weight')
nx.draw_networkx_edge_labels(DG, pos, edge_labels=weights)
Out[69]:
{(1, 2): Text(-0.015005894359660137, -0.4925013339888995, '0.5'),
 (1, 4): Text(0.16943389942815204, 0.41334984648908324, '0.1'),
 (3, 1): Text(-0.1491277106951322, -0.09414881952201734, '0.75')}
In [68]:
print(list(DG.successors(1)))
print(list(DG.neighbors(1)))
print(list(DG.predecessors(1)))
print(DG.out_degree(1))
print(DG.in_degree(1))
print(DG.out_degree(1,weight='weight'))
[2, 4]
[2, 4]
[3]
2
1
0.6

Connected components¶

In [74]:
nx.draw_networkx(G3)
In [75]:
G3.add_edge('1','Alice')
G3.remove_edge('1','Alice') #you can also remove_node, or a list of nodes or edges (remove_nodes_from, remove_edges_from)
G3.add_edge('Alice','Charlie')
G3.add_edge('1','4')
nx.draw_networkx(G3)
print(nx.number_connected_components(G3))
2
In [76]:
C = nx.connected_components(G3)
print(type(C))
for c in C:
    print(c)
<class 'generator'>
{'3', '4', '1', '2'}
{'Bob', 'Charlie', 'Alice'}

Get the connected component subgraphs

In [77]:
connected_subgraphs = [nx.subgraph(G3,c) for c in nx.connected_components(G3)]
for GC in connected_subgraphs:
    print(GC.nodes())
    print(GC.edges())
    print(len(GC))
['1', '2', '3', '4']
[('1', '2'), ('1', '3'), ('1', '4'), ('2', '3')]
4
['Bob', 'Charlie', 'Alice']
[('Bob', 'Alice'), ('Bob', 'Charlie'), ('Charlie', 'Alice')]
3

Get the largest connected component

In [78]:
# Get the nodes

largest_cc = max(nx.connected_components(G3), key=len)
print(largest_cc)
{'3', '4', '1', '2'}
In [79]:
#Get the subgraph

largest_cc = max(nx.connected_components(G3), key=len)
print(largest_cc)
CC_max = nx.subgraph(G3,largest_cc)
nx.draw_networkx(CC_max)
{'3', '4', '1', '2'}

Shortest paths¶

In [80]:
G3.add_edge('1','Alice')
nx.draw_networkx(G3)
sp = nx.shortest_path(G3,'3','Bob')
print(sp)
print(len(sp)-1)
print(nx.shortest_path_length(G3,'3','Bob'))
['3', '1', 'Alice', 'Bob']
3
3
In [81]:
SP1 = nx.single_source_shortest_path(G3,'1')
print(SP1)
#print(nx.single_source_shortest_path_length(G3,'1'))
{'1': ['1'], '2': ['1', '2'], '3': ['1', '3'], '4': ['1', '4'], 'Alice': ['1', 'Alice'], 'Bob': ['1', 'Alice', 'Bob'], 'Charlie': ['1', 'Alice', 'Charlie']}
In [82]:
SP = dict(nx.all_pairs_shortest_path(G3))
print(SP)
{'1': {'1': ['1'], '2': ['1', '2'], '3': ['1', '3'], '4': ['1', '4'], 'Alice': ['1', 'Alice'], 'Bob': ['1', 'Alice', 'Bob'], 'Charlie': ['1', 'Alice', 'Charlie']}, '2': {'2': ['2'], '1': ['2', '1'], '3': ['2', '3'], '4': ['2', '1', '4'], 'Alice': ['2', '1', 'Alice'], 'Bob': ['2', '1', 'Alice', 'Bob'], 'Charlie': ['2', '1', 'Alice', 'Charlie']}, '3': {'3': ['3'], '1': ['3', '1'], '2': ['3', '2'], '4': ['3', '1', '4'], 'Alice': ['3', '1', 'Alice'], 'Bob': ['3', '1', 'Alice', 'Bob'], 'Charlie': ['3', '1', 'Alice', 'Charlie']}, 'Alice': {'Alice': ['Alice'], 'Bob': ['Alice', 'Bob'], 'Charlie': ['Alice', 'Charlie'], '1': ['Alice', '1'], '2': ['Alice', '1', '2'], '3': ['Alice', '1', '3'], '4': ['Alice', '1', '4']}, 'Bob': {'Bob': ['Bob'], 'Alice': ['Bob', 'Alice'], 'Charlie': ['Bob', 'Charlie'], '1': ['Bob', 'Alice', '1'], '2': ['Bob', 'Alice', '1', '2'], '3': ['Bob', 'Alice', '1', '3'], '4': ['Bob', 'Alice', '1', '4']}, 'Charlie': {'Charlie': ['Charlie'], 'Bob': ['Charlie', 'Bob'], 'Alice': ['Charlie', 'Alice'], '1': ['Charlie', 'Alice', '1'], '2': ['Charlie', 'Alice', '1', '2'], '3': ['Charlie', 'Alice', '1', '3'], '4': ['Charlie', 'Alice', '1', '4']}, '4': {'4': ['4'], '1': ['4', '1'], '2': ['4', '1', '2'], '3': ['4', '1', '3'], 'Alice': ['4', '1', 'Alice'], 'Bob': ['4', '1', 'Alice', 'Bob'], 'Charlie': ['4', '1', 'Alice', 'Charlie']}}
In [83]:
print(SP['1']['Bob'])
['1', 'Alice', 'Bob']

Link Analysis¶

https://networkx.github.io/documentation/stable/reference/algorithms/link_analysis.html

In [84]:
DG2 = nx.DiGraph()
DG2.add_edges_from([(1,2),(1,3),(3,2),(2,5),(4,1),(4,2),(4,3),(5,1),(5,4)])
nx.draw_networkx(DG2)

Pagerank¶

The result of the Pagerank algorithm is a dictionary with the node ids as keys, and the pagerank values as values.

In [85]:
pr = nx.pagerank(DG2)
print(pr)

pr = nx.pagerank(G3)
print(pr)
{1: 0.18064505060873784, 2: 0.27131643087724033, 3: 0.14665711544131713, 5: 0.26061906832422166, 4: 0.140762334748483}
{'1': 0.2420308196543696, '2': 0.12671382905463274, '3': 0.12671382905463274, 'Alice': 0.17987731223897474, 'Bob': 0.12590180503903042, 'Charlie': 0.12590180503903042, '4': 0.0728605999193293}

HITS¶

In [86]:
[h,a] = nx.hits(DG2)
print(h)
print(a)
print(a[2])
{1: 0.30284190939588396, 2: 0.0, 3: 0.16745199268671332, 5: 0.1254412261267391, 4: 0.4042648717906636}
{1: 0.23681287910395024, 2: 0.39098432508292885, 3: 0.3161224561036187, 5: 0.0, 4: 0.05608033970950211}
0.39098432508292885

Pesronalized Pagerank¶

We will see the example we presented in class

In [3]:
G4 = nx.read_edgelist('graph-example.txt')
nx.draw_networkx(G4)

To define the personalized Pagerank, we will use the "personalization" parameter, were we define the jump vector as a dictionary, with node ids as the keys, and the jump probability to each node as the values

In [4]:
print(nx.pagerank(G4))
print(nx.pagerank(G4, personalization = {'1':1}))
print(nx.pagerank(G4, personalization = {'6':1}))
{'1': 0.12755099866537586, '2': 0.18232887836414127, '3': 0.2394858218322733, '4': 0.18433868256410274, '5': 0.1326787034099837, '6': 0.13361691516412283}
{'1': 0.2583391334031776, '2': 0.20094608623757965, '3': 0.24190126489812175, '4': 0.14028792201015408, '5': 0.0833530743914921, '6': 0.07517251905947459}
{'1': 0.07517273333798744, '2': 0.12532699696556127, '3': 0.1866533325726192, '4': 0.18958031704255388, '5': 0.15407113246217508, '6': 0.26919548761910295}

We will now change the jump probability. To change the jump probability we will set the parameter alpha.

Attention: The parameter alpha is not the jump probability as we have described in class, but the probability of following an out-link. The jump probability is 1-alpha.

In [5]:
print(nx.pagerank(G4, alpha = 0.5))
print(nx.pagerank(G4, alpha = 0.5, personalization = {'1':1}))
print(nx.pagerank(G4, alpha = 0.5, personalization = {'6':1}))
{'1': 0.1390335398088414, '2': 0.1741686298166486, '3': 0.2133752383994728, '4': 0.17643097727782997, '5': 0.14740281022602297, '6': 0.14958880447118414}
{'1': 0.5510064790190851, '2': 0.16964493982349704, '3': 0.18185949066208873, '4': 0.054964764048023335, '5': 0.026690643658381363, '6': 0.015833682788924434}
{'1': 0.015833407896886878, '2': 0.039357701756408944, '3': 0.07419162031220411, '4': 0.15675161523961573, '5': 0.150192049257395, '6': 0.5636736055374894}

Betweeness¶

In [132]:
BC = nx.edge_betweenness_centrality(G3)
print(BC)
{('1', '2'): 0.23809523809523808, ('1', '3'): 0.23809523809523808, ('1', '4'): 0.2857142857142857, ('1', 'Alice'): 0.5714285714285714, ('2', '3'): 0.047619047619047616, ('Alice', 'Bob'): 0.23809523809523808, ('Alice', 'Charlie'): 0.23809523809523808, ('Bob', 'Charlie'): 0.047619047619047616}

Drawing Graphs¶

https://networkx.org/documentation/stable/reference/drawing.html

In [90]:
nx.draw_networkx(G3)
In [95]:
nx.draw(G3, with_labels = True)
In [92]:
nx.draw_circular(G3)
In [93]:
nx.draw_spectral(G3, with_labels=True)
In [96]:
nx.draw_spring(G3)
In [97]:
nx.draw_spring(DG2)

An example¶

In [98]:
karate=nx.read_gml("karate.gml",label='id')
nx.draw_networkx(karate)
In [99]:
nx.draw_spring(karate,with_labels=True)

Change the size of the nodes depending on their pagerank value

In [100]:
pr = nx.pagerank(karate)
nx.draw_networkx(karate,node_size=[10000*pr[x] for x in pr])

A PageRank implementation¶

We will now do our own implementation of Pagerank. Pagerank values for node $i$ are computed by iterating the following formula:

$$p_i = 0.85\sum_{j\rightarrow i}\frac{p_j}{d_{out}(j)} +0.15\frac 1n$$

We will associate each node with two values: the old pagerank in the previous step and the new one that is currently computed. We initialize the old pagerank to $1/n$

In [101]:
for x in karate.nodes:
    karate.nodes[x]['old_pr'] = 1/len(karate.nodes)
    karate.nodes[x]['pr'] = 0

The algorithm goes over the edges in the graph, and for each edge (x,y) transfers a fraction of the Pagerank of x to y (and vice versa since the graph is undirected).

For convergece check we want the maximum difference between old and new Pagerank values to be less than eps.

In [147]:
eps = 0.0000001
while (True):
    for (x,y) in karate.edges:
        karate.nodes[y]['pr'] += karate.nodes[x]['old_pr']/karate.degree(x)
        karate.nodes[x]['pr'] += karate.nodes[y]['old_pr']/karate.degree(y)

    diff = 0
    for x in karate.nodes:
        karate.nodes[x]['pr'] = karate.nodes[x]['pr']*0.85 + 0.15/len(karate.nodes)
        diff = max(diff, abs(karate.nodes[x]['pr'] - karate.nodes[x]['old_pr']))
    if diff < eps: break
        
    for x in karate.nodes:
        karate.nodes[x]['old_pr'] = karate.nodes[x]['pr']
        karate.nodes[x]['pr'] = 0
        
print({x:karate.nodes[x]['pr'] for x in karate.nodes})
{1: 0.09699768951770414, 2: 0.052877025190054515, 3: 0.05707849169631034, 4: 0.03585995583805577, 5: 0.021978125705124495, 6: 0.029111415561523094, 7: 0.029111415561523094, 8: 0.024490566882881017, 9: 0.029766022059362227, 10: 0.014309373641885614, 11: 0.021978125705124495, 12: 0.009564779215464548, 13: 0.014644945058585735, 14: 0.029536503714110846, 15: 0.01453594661310139, 16: 0.01453594661310139, 17: 0.01678415323052132, 18: 0.014558728934986399, 19: 0.01453594661310139, 20: 0.01960466576621623, 21: 0.01453594661310139, 22: 0.014558728934986399, 23: 0.01453594661310139, 24: 0.03152236472056747, 25: 0.021075945456452493, 26: 0.021006101393715268, 27: 0.015043975625369849, 28: 0.025639673598356803, 29: 0.019573410373247335, 30: 0.026288410971252903, 31: 0.024590112854779363, 32: 0.037157989139649, 33: 0.07169286024483915, 34: 0.10091871034184292}

We got essentially the same values as for the Pagerank vector.

In [148]:
pr
Out[148]:
{1: 0.09700181758983709,
 2: 0.05287839103742701,
 3: 0.057078423047636745,
 4: 0.03586064322306479,
 5: 0.021979406974834498,
 6: 0.02911334166344221,
 7: 0.02911334166344221,
 8: 0.024490758039509182,
 9: 0.029765339186167028,
 10: 0.014308950284462801,
 11: 0.021979406974834498,
 12: 0.009564916863537148,
 13: 0.014645186487916191,
 14: 0.029536314977202986,
 15: 0.014535161524273825,
 16: 0.014535161524273825,
 17: 0.016785378110253487,
 18: 0.014558859774243493,
 19: 0.014535161524273825,
 20: 0.019604416711937293,
 21: 0.014535161524273825,
 22: 0.014558859774243493,
 23: 0.014535161524273825,
 24: 0.03152091531163228,
 25: 0.021075455001162945,
 26: 0.021005628174745786,
 27: 0.015043395360629753,
 28: 0.025638803528350497,
 29: 0.01957296050943854,
 30: 0.02628726283711208,
 31: 0.02458933653429248,
 32: 0.03715663592267942,
 33: 0.07169213006588289,
 34: 0.1009179167487121}
In [ ]: