Python English Version

来自集智百科
跳转到: 导航搜索

目录

Imported libraries

    import numpy as np
    import matplotlib.pyplot as plt
    import netwokx as nx

The flow network analysis algorithms

Import and visualize network data

Construct flow network from log file

demo data:

 dd=[['a',0],['a',1],['a',2],['b',1],['b',2],['c',1],['c',2],['c',3],['d',2],['d',3],
 ['e',0],['e',4],['f',0],['f',4],['g',0],['g',4],['g',5],['h',0],['h',5],['i',6]]

function definition:

    def constructFlowNetwork (data):
        LastCookie=data[0][0]
        LastUrl=str(data[0][1])
        ed={}
        for i in data[1:]:
            cookie = i[0]
            url = str(i[1])
            if cookie == LastCookie:
                eg = LastUrl + "\t" + url
            else:
                eg = LastUrl + "\t" + "sink"        
            if eg in ed:
                ed[eg] += 1
            else:
                ed[eg] = 1
            LastCookie=cookie
            LastUrl=url
        fe = str(data[-1][1]) + "\t" + "sink" 
        if fe in ed:
            ed[fe] += 1
        else:
            ed[fe] = 1
        G=nx.DiGraph()
        for i in ed.items():
            a=re.split("\t",i[0])
            G.add_edge(a[0],a[1],weight=i[1])
        return G

function usage:

    G = constructFlowNetwork(dd)

Visualize weighted networks

    n = G.number_of_edges()   
    pos = nx.spring_layout(G) 
    colors =range(n)
    nx.draw(G, pos, node_size = 5, node_color = "#A0CBE2",edge_color=colors, width =1,edge_cmap=plt.cm.Blues, with_labels= False)  
    #plt.savefig("E:/exampleNetwork.png")

The basic quantities of flow networks

Calculate the inflow and outflow of a node

outflow

    def outflow(G,node):
        n=0
        for i in G.edges(node):
            n+=G[i[0]][i[1]].values()[0]
        return n

inflow

    def inflow(G,node):
        return outflow(G.reverse(), node)

Balance a flow network

function definition

    def flowBalancing(G):
        RG = G.reverse()
        H = G.copy()
        def nodeBalancing(node):
            outw=0
            for i in G.edges(node):
                outw+=G[i[0]][i[1]].values()[0]
            inw=0
            for i in RG.edges(node):
                inw+=RG[i[0]][i[1]].values()[0]
            deltaflow=inw-outw
            if deltaflow > 0:
                H.add_edge(node, "sink",weight=deltaflow)
            elif deltaflow < 0:
                H.add_edge("source", node, weight=abs(deltaflow))
            else:
                pass
        for i in G.nodes():
            nodeBalancing(i)
        H.remove_edge("sink", "sink")
        return H

function usage

    H = flowBalancing(G)

Obtain the dissipation on each node

function definition

    def networkDissipation(G):
        d=[]
        RG = G.reverse()
        def nodeDissipate(node):
            toSink=0
            outw=0
            for i in G.edges(node):
                outw += G[i[0]][i[1]].values()[0]
                if i[1]=="sink":
                    toSink += G[i[0]][i[1]].values()[0]
            inw=0
            for i in RG.edges(node):
                inw+=RG[i[0]][i[1]].values()[0]
            fromSource=outw-inw
            totalflow=max(inw,outw)
            d.append([totalflow,toSink,fromSource])
        for i in G.nodes():
            if i=="sink":
                continue
            nodeDissipate(i)
        return d

function usage

    D = networkDissipation(G)

Derive fundamental matrix based on Markov analysis

import libraries

    from numpy import linalg as LA

function definition

    def getUmatrix(G):
        H = flowBalancing(G)
        F1=nx.to_numpy_matrix(H)
        sourcep=H.nodes().index('source')
        sinkp=H.nodes().index('sink')
        F1oJ=F1[sourcep,]
        if sinkp > sourcep:
            F1oJ=delete(F1oJ,sourcep,1)
            F1oJ=delete(F1oJ,sinkp-1,1)
            AI=delete(AI,sourcep,1)
            AI=delete(AI,sinkp-1,1)
            F1[sinkp,sinkp]=1
            M = F1 / F1.sum(axis=1)
            M = delete(M, sinkp, 0) 
            M = delete(M, sinkp, 1) 
            I = np.identity(len(M))
            U =  LA.inv( I - M)
            U = delete(U, sourcep, 0) 
            U = delete(U, sourcep, 1)
        else:
            F1oJ=delete(F1oJ,sinkp,1)
            F1oJ=delete(F1oJ,sourcep-1,1)
            AI=delete(AI,sinkp,1)
            AI=delete(AI,sourcep-1,1)
            F1[sinkp,sinkp]=1
            M = F1 / F1.sum(axis=1)
            M = delete(M, sinkp, 0) 
            M = delete(M, sinkp, 1) 
            I = np.identity(len(M))
            U =  LA.inv( I - M)
            U = delete(U, sourcep-1, 0) 
            U = delete(U, sourcep-1, 1)     
        return U

function usage

 
    U = getUmatrix(G)

Calculate the average flow length

function definition

    def flowLength(G):
        H = flowBalancing(G)
        F1=nx.to_numpy_matrix(H)
        sp=H.nodes().index('sink')
        F1[sp,sp]=1
        M = F1 / F1.sum(axis=1)
        M = delete(M, sp, 0) 
        M = delete(M, sp, 1) 
        I = np.identity(len(M))
        U =  LA.inv( I - M)
        L = np.sum(U[0,])
        return L

function usage

    L = flowLength(G)

Obtain the balanced flow and the circulated flow controlled by each node

import libraries

    from numpy import linalg as LA

function definition

    def AICI(G):
        H = flowBalancing(G)
        F1=nx.to_numpy_matrix(H)
        sourcep=H.nodes().index('source')
        sinkp=H.nodes().index('sink')
        F1oJ=F1[sourcep,]
        AI = F1.sum(0)
        if sinkp > sourcep:
            F1oJ=delete(F1oJ,sourcep,1)
            F1oJ=delete(F1oJ,sinkp-1,1)
            AI=delete(AI,sourcep,1)
            AI=delete(AI,sinkp-1,1)
            F1[sinkp,sinkp]=1
            M = F1 / F1.sum(axis=1)
            M = delete(M, sinkp, 0) 
            M = delete(M, sinkp, 1) 
            I = np.identity(len(M))
            U =  LA.inv( I - M)
            U = delete(U, sourcep, 0) 
            U = delete(U, sourcep, 1)
        else:
            F1oJ=delete(F1oJ,sinkp,1)
            F1oJ=delete(F1oJ,sourcep-1,1)
            AI=delete(AI,sinkp,1)
            AI=delete(AI,sourcep-1,1)
            F1[sinkp,sinkp]=1
            M = F1 / F1.sum(axis=1)
            M = delete(M, sinkp, 0) 
            M = delete(M, sinkp, 1) 
            I = np.identity(len(M))
            U =  LA.inv( I - M)
            U = delete(U, sourcep-1, 0) 
            U = delete(U, sourcep-1, 1)     
        def calculateCi(i):
            Ci =sum(U[i,:])*sum(np.dot(F1oJ,U[:,i]))/U[i,i]
            return Ci
        CI = map( lambda x:calculateCi(x),range(len(U)) )
        AI = AI.tolist()[0]
        return np.transpose(np.vstack((AI,CI)))

function usage

    AICIData = AICI(G)

Network re-normalization techniques

Calculate the box fractal dimension of networks

import libraries

    from math import *
    from networkx import *
    import random
    from copy import deepcopy
    import os, sys
    import time
    import re
    import statsmodels.api as sm

function copyright

    #------download address:lev[dot]ccny[dot]cuny[dot]edu/~hmakse/modules[dot]py-----------------#
    '''
    Comments:
    If you have any questions or find a bug write Hernan Rozenfeld an email at
    hernanrozenfeld at domain gmail with the usual "dot com" at the end. 
    Code in Python (written by Hernan Rozenfeld):
    For this code to run you'll need to install Python and Networkx. 
    File "modules.py" contains algorithms MEMB, CBB, and random covering for
    network renormalization.
    '''

function definition

1 Compact box burning algorithm

    def CBB(G,lb): #This is the compact box burning algorithm.
        	"""
        	It returns a dictionary with {box_id:subgraph_generated_by_the_nodes_in_this_box}
        	The box_id is the center of the box.
        	The subgraphs may be disconnected.
        	"""	
        	uncovered_nodes=G.nodes()
        	uncovered_nodes = set(uncovered_nodes)
        	covered_nodes = set([])
        	boxes_subgraphs = {}
        	adj = G.adj
        	while uncovered_nodes:
        		center = random.choice(list(uncovered_nodes))
        		nodes_visited = {center:0}
        		search_queue = [center]
        		d = 1
        		while len(search_queue) > 0 and d <= lb-1:
        			next_depth = []
        			extend = next_depth.extend
        			for n in search_queue:
        				l = [ i for i in adj[n].iterkeys() if i not in nodes_visited ]
        				extend(l)
        				for j in l: 
        					nodes_visited[j] = d
        			search_queue = next_depth
        			d += 1
        		new_covered_nodes = set(nodes_visited.keys())
        		new_covered_nodes = new_covered_nodes.difference(covered_nodes)
        		nodes_checked_as_centers = set([center])
        		while len(nodes_checked_as_centers) < len(new_covered_nodes):
        			secondary_center = random.choice(list(new_covered_nodes.difference(nodes_checked_as_centers)))
        			nodes_checked_as_centers.add(secondary_center)
        			nodes_visited = {secondary_center:0}
        			search_queue = [secondary_center]
        			d = 1
        			while len(search_queue) > 0 and d <= lb-1:
        				next_depth = []
        				extend = next_depth.extend
        				for n in search_queue:
        					l = [ i for i in adj[n].iterkeys() if i not in nodes_visited ] # faster than has_key? yep
        					extend(l)
        					for j in l:
        						nodes_visited[j] = d
        				search_queue = next_depth
        				d += 1
        			nodes_covered_by_secondary = set(nodes_visited.keys())
        			new_covered_nodes = new_covered_nodes.intersection(nodes_covered_by_secondary)
        		boxes_subgraphs[center] = subgraph(G,list(new_covered_nodes))
        		uncovered_nodes = uncovered_nodes.difference(new_covered_nodes)
        		covered_nodes = covered_nodes.union(new_covered_nodes)
        	return boxes_subgraphs

2 Random box covering algorithm

    def random_box_covering(G,rb):
    	"""
    	It returns a dictionary with {box_id:subgraph_generated_by_the_nodes_in_this_box}
    	The box_id is the center of the box.
    	"""
    	H = deepcopy(G)
    	burned_nodes = []
    	unburned_nodes = G.nodes()
    	boxes_subgraphs = {}
    	adj = H.adj
    	while unburned_nodes:
    		center_node = random.choice(unburned_nodes) 
    		nodes_visited = [center_node]
    		search_queue = [center_node]
    		d = 1
    		while search_queue and d <= rb:
    			next_depth = []
    			extend = next_depth.extend
    			for n in search_queue:
    				l = [ i for i in adj[n].iterkeys() if i not in nodes_visited]
    				extend(l)
    				nodes_visited.extend(l)
    			search_queue = next_depth
    			d += 1
    		new_burned_nodes = nodes_visited#.keys()
    		H.remove_nodes_from(new_burned_nodes)
    		boxes_subgraphs[center_node] = subgraph(G,new_burned_nodes)
    		unburned_nodes = list(set(unburned_nodes)-set(new_burned_nodes))
    	return boxes_subgraphs

3 MEMB algorithm

    def MEMB(G,rb,cycle=0):
        	"""
        	It returns a dictionary with {box_id:subgraph_generated_by_the_nodes_in_this_box}
        	The box_id is the center of the box.
        	cycle: Ignore this parameter. Use the default cycle=0.
        	"""
        	adj = G.adj
        	number_of_nodes = G.number_of_nodes()
        	covered_nodes = set()
        	center_nodes = set()
        	non_center_nodes = G.nodes()
        	center_node_found = 0
        	boxes={} #this will be "box_id:[nodes in box]"
        	central_distance_of_node = {} #"node:central_distance"
        	node_box_id = {} #"node:box_id"
        	nodes_sorted_by_central_distance={} #Dict with {central_distance:[nodes]}
        	excluded_mass_of_non_centers_rb = {} #This contains [(node:excluded_mass)] for rb
        	excluded_mass_of_non_centers_rb2 = {} #This contains [(node:excluded_mass)] for rb+1
        	rb2 = rb + 1
        	for node in non_center_nodes:
        		#if node in [5000,10000,20000,30000]: print "node", node
        		level=0                  # the current level
        		nextlevel={node:1}       # list of nodes to check at next level
        		paths_rb=None
        		paths_rb2={node:[node]} # paths dictionary  (paths to key from source)
        		while nextlevel:
        			paths_rb = deepcopy(paths_rb2)
        			thislevel=nextlevel
        			nextlevel={}
        			for v in thislevel:
        				for w in G.neighbors(v):
        					if not paths_rb2.has_key(w):
        						paths_rb2[w]=paths_rb2[v]+[w]
        						nextlevel[w]=1
        			level=level+1
        			if (rb2 <= level):  break
        		excluded_mass_of_node = len(paths_rb2)
        		try:
        			excluded_mass_of_non_centers_rb2[excluded_mass_of_node].append(node)
        		except KeyError:
        			excluded_mass_of_non_centers_rb2[excluded_mass_of_node] = [node]
        		excluded_mass_of_node = len(paths_rb)
        		try:
        			excluded_mass_of_non_centers_rb[excluded_mass_of_node].append(node)
        		except KeyError:
        			excluded_mass_of_non_centers_rb[excluded_mass_of_node] = [node]
        	maximum_excluded_mass = 0
        	nodes_with_maximum_excluded_mass=[]
        	new_covered_nodes = {}
        	center_node_and_mass = []
        	cycle_index = 0
        	while len(covered_nodes) < number_of_nodes:
        		#print len(covered_nodes),number_of_nodes
        		cycle_index += 1
        		if cycle_index == cycle:
        			rb2 = rb+1
        			cycle_index = 0
        		else:
        			rb2 = rb
        		while 1:
        			if rb2 == rb+1:
        				#t1=time.time()
        				while 1:
        					maximum_key = max(excluded_mass_of_non_centers_rb2.keys())
        					node = random.choice(excluded_mass_of_non_centers_rb2[maximum_key])
        					if node in center_nodes:
        						excluded_mass_of_non_centers_rb2[maximum_key].remove(node)
        						if not excluded_mass_of_non_centers_rb2[maximum_key]: del excluded_mass_of_non_centers_rb2[maximum_key]
        					else:
        						break	
        				nodes_visited = {}
        				bfs = single_source_shortest_path(G,node,cutoff=rb2)
        				for i in bfs:
        					nodes_visited[i] = len(bfs[i])-1
        				excluded_mass_of_node = len(set(nodes_visited.keys()).difference(covered_nodes))
        				if excluded_mass_of_node == maximum_key:
        					center_node_and_mass = (node,maximum_key)
        					excluded_mass_of_non_centers_rb2[maximum_key].remove(node)
        					if not excluded_mass_of_non_centers_rb2[maximum_key]: del excluded_mass_of_non_centers_rb2[maximum_key]
        					new_covered_nodes = nodes_visited
        					break
        				else:
        					excluded_mass_of_non_centers_rb2[maximum_key].remove(node)
        					if not excluded_mass_of_non_centers_rb2[maximum_key]: del excluded_mass_of_non_centers_rb2[maximum_key]
        					try:
        						excluded_mass_of_non_centers_rb2[excluded_mass_of_node].append(node)
        					except KeyError:
        						excluded_mass_of_non_centers_rb2[excluded_mass_of_node] = [node]
        				#print "time", time.time()-t1
        			else:
        				#t1=time.time()
        				while 1:
        					maximum_key = max(excluded_mass_of_non_centers_rb.keys())
        					node = random.choice(excluded_mass_of_non_centers_rb[maximum_key])
        					if node in center_nodes:
        						excluded_mass_of_non_centers_rb[maximum_key].remove(node)
        						if not excluded_mass_of_non_centers_rb[maximum_key]: del excluded_mass_of_non_centers_rb[maximum_key]
        					else:
        						break	
        				nodes_visited = {}
        				bfs = single_source_shortest_path(G,node,cutoff=rb)
        				for i in bfs:
        					nodes_visited[i] = len(bfs[i])-1
        				excluded_mass_of_node = len(set(nodes_visited.keys()).difference(covered_nodes))
        				if excluded_mass_of_node == maximum_key:
        					center_node_and_mass = (node,maximum_key)
        					excluded_mass_of_non_centers_rb[maximum_key].remove(node)
        					if not excluded_mass_of_non_centers_rb[maximum_key]: del excluded_mass_of_non_centers_rb[maximum_key]
        					new_covered_nodes = nodes_visited
        					break
        				else:
        					excluded_mass_of_non_centers_rb[maximum_key].remove(node)
        					if not excluded_mass_of_non_centers_rb[maximum_key]: del excluded_mass_of_non_centers_rb[maximum_key]
        					try:
        						excluded_mass_of_non_centers_rb[excluded_mass_of_node].append(node)
        					except KeyError:
        						excluded_mass_of_non_centers_rb[excluded_mass_of_node] = [node]
        				#print "time", time.time()-t1
        #					
        		center_node_found = center_node_and_mass[0]
        		boxes[center_node_found] = [center_node_found]
        		node_box_id[center_node_found] = center_node_found
        		non_center_nodes.remove(center_node_found)
        		center_nodes.add(center_node_found)
 
        		covered_nodes = covered_nodes.union(set(new_covered_nodes.keys()))
        		#print len(covered_nodes)
        		for i in new_covered_nodes:
        #			
        			try:
        				if central_distance_of_node[i] > new_covered_nodes[i]:
        					nodes_sorted_by_central_distance[central_distance_of_node[i]].remove(i)
        					if not nodes_sorted_by_central_distance[central_distance_of_node[i]]:
        						del nodes_sorted_by_central_distance[central_distance_of_node[i]]
        					try:
        						nodes_sorted_by_central_distance[new_covered_nodes[i]].append(i)
        					except KeyError:
        						nodes_sorted_by_central_distance[new_covered_nodes[i]] = [i]
        					central_distance_of_node[i] = new_covered_nodes[i]
        			except KeyError:
        				central_distance_of_node[i] = new_covered_nodes[i]
        				try:
        					nodes_sorted_by_central_distance[new_covered_nodes[i]].append(i)
        				except:
        					nodes_sorted_by_central_distance[new_covered_nodes[i]] = [i]
        #
        	max_distance = max(nodes_sorted_by_central_distance.keys())
        	for i in range(1,max_distance+1):
        		for j in nodes_sorted_by_central_distance[i]:
        			targets = list(set(adj[j].iterkeys()).intersection(set(nodes_sorted_by_central_distance[i-1])))
        			node_box_id[j] = node_box_id[random.choice(targets)]
        			boxes[node_box_id[j]].append(j)
        	boxes_subgraphs={}
        	for i in boxes:
        		boxes_subgraphs[i] = subgraph(G,boxes[i])
        #	
        	return boxes_subgraphs

function usage

    def alloRegressPlot(xdata,ydata,col,xlab,ylab):
        x=xdata;y=ydata;
        xx = sm.add_constant(x, prepend=True)
        res = sm.OLS(y,xx).fit()
        constant=res.params[0];beta=res.params[1]; r2=res.rsquared
        plt.plot(xdata,ydata, "o",color=col,markersize=5)
        plt.plot(xdata,constant+xdata*beta,"red")
        plt.text(min(xdata)+(max(xdata)-min(xdata))/10,min(ydata)+(max(ydata)-min(ydata))/2,str(np.round(beta,2)))
        plt.xlabel(xlab)
        plt.ylabel(ylab)
    def fractalDimension(G):
        data = box_counting_dim(G,lmax=20,num_covers=1)
        alloRegressPlot(np.log(np.array(data.keys())),np.log(np.array(data.values())),"b","l","N")
 
    d = fractalDimension(G)

4 Random box by BEDWARDS

function copyright

    #------download address:networkx[dot]lanl[dot]gov/trac/ticket/359-----------------#
    """Return the fractal box counting dimension
    Returns the box counting dimension given by:
        "...the network is covered with Nb boxes, where all
        vertices in each of them are connected by a minimum
        distance smaller than lb. Nb and lb are found to be
        related by
            Nb proportional lb^-db
        where db is the fractal box dimension of the network."
    Note that Nb should be the minimal cover (ie inf(all Nb))
    however, [2] note that
        "...we find that minimization is not relevant and
        any covering gives rise to the same exponenant."
    It is unclear in the paper whether this is only for networks
    that exhibit a scale free property, or for all networks
    so use with caution. We provide a value to calculate multiple
    random covers for each l. It will then take the minimum.
    Parameters
    ----------
    G          : graph
                 The graph used to compute the fractal cluster dimension.
    lmax       : Int
                 Max L to calculate Nb
    num_covers : int
                 Number of covers to calculate for each l
    Returns
    -------       
    Nb(l) : dict of ints
            The number of boxes need to cover the nodes given
            boxes of size l
    References
    ----------
    .. [1] L. da F. Costa, F.A. Rodrigues, G. Travieso, P.R. Villas Boas
           "Characterization of complex networks: A survey of
           measurements"
           Advances in Physics, Volume 56, Issue 1, Jan 2007
           pages 167-242
       [2] C. Song, S. Havlin and H.A. Makse, Nature 433 392 (2005).
    """

function definition

    def size_box_cover(G,l):
        Nbl = 0
        not_covered = set(G.nodes())
        while not (list(not_covered)==[]):
            n = random.choice(list(not_covered))
            sp = nx.single_source_shortest_path_length(G,n,cutoff=l)
            box_nodes = set(sp.keys())
            not_covered.difference_update(box_nodes)
            Nbl = Nbl + 1
        return Nbl
 
    def box_counting_dim(G,lmax=None,num_covers=1):
        if (lmax==None):
            lmax = nx.diameter(G)
        Nb = {}
        for l in range(1,lmax+1):
            Nblmin = G.order()
            for n in range(num_covers):
                Nbl = size_box_cover(G,l)
                if (Nbl <Nblmin):
                    Nblmin = Nbl
            Nb[l] = Nblmin
        return Nb

Calculate the cluster fractal dimension of networks

1. by BEDWARDS

function copyright

    #------download address:networkx[dot]lanl[dot]gov/trac/ticket/359-----------------#
    """Return the fractal cluster dimension
    Returns the cluster dimension for a network. Given by
        "...a seed vertex is chosen at random and a cluster
        is formed by vertices distance at most l from the
        seed. This process is repeated many times and the
        average mass of the resulting clusters is calculated
        as a function of l, resulting the the relation
        <Mc> proportional l^df
        where the average mass <Mc> is defined as the  number
        of vertices in the cluster and df is the fractal
        cluster dimension"[1]
    Parameters
    ----------
    G        : graph
               The graph used to compute the fractal cluster dimension.
    nsamples : int
               Number of random nsamples to use for each l
    lmax     : Int
               Largest l to calculate dimension for
    Returns
    -------       
    Mc(l) : dict of floats
            The average mass for each l given in a dictionary keyed
            by l.
    References
    ----------
    .. [1] L. da F. Costa, F.A. Rodrigues, G. Travieso, P.R. Villas Boas
           "Characterization of complex networks: A survey of
           measurements"
           Advances in Physics, Volume 56, Issue 1, Jan 2007
           pages 167-242
    """

function definition

    def average_cluster_mass(G,l,nsamples=10):
        try:
            nodes = random.sample(G.nodes(), nsamples)
        except ValueError:
            raise nx.NetworkXError("nsamples larger than number of nodes")
        mass = 0.0
        for n in nodes:
            sp = nx.single_source_shortest_path_length(G,n,cutoff=l)
            mass+= len(sp) - 1
        return float(mass)/len(nodes)
 
    def cluster_dim(G,nsamples=10,lmax=None):
        if (lmax==None):
            lmax = nx.diameter(G)
        Mc = {}
        for l in range(1,lmax+1):
            Mc[l] = average_cluster_mass(G,l,nsamples)
        return Mc

Consider the direction and weights of edges in network re-normalization

consider the direction of edges

    G = nx.DiGraph()
    If don't want to consider the direction of edges 
    H = nx.Graph() 或 H = G.to_undirected()

consider weights

    replace
    sp = nx.single_source_shortest_path_length(G,n,cutoff=l) 
    with
    sp = nx.single_source_shortest_path_length(G,n,cutoff=l)

The change of basic flow network quantities in re-normalization

import libraries

    import statsmodels.api as sm

function definition

    def renormalizeMapping(G,l):
        replaceDic={}
        not_covered = set(G.nodes())
        while not (list(not_covered)==[]):
            n = random.choice(list(not_covered))
            sp = nx.single_source_shortest_path_length(G,n,cutoff=l)
            #sp = nx.single_source_dijkstra_path_length(G,source=n,cutoff=l,weight='weight')
            box_nodes = set(sp.keys())
            for i in list(box_nodes):
                replaceDic[i]=n
            not_covered.difference_update(box_nodes)
        return replaceDic

calculate quantities of interest in each step of re-normalization

 
    def renormalizeStat(G,l):
        mapping = renormalizeMapping(G,l)
        G = nx.relabel_nodes(G,mapping)
        G.remove_edges_from(G.selfloop_edges())
        n = nx.number_of_nodes(G)
        m = nx.number_of_edges(G)
        w = G.size(weight='weight')
        return [n,m,w]

The re-normalization of flow networks containing several components

    def renormalizeStat(G,l):
        if "sink" in G.nodes():
            G.remove_node("sink")
        g = nx.weakly_connected_component_subgraphs(G)[0]
        mapping = renormalizeMapping(g,l)
        g = nx.relabel_nodes(g,mapping)
        g.remove_edges_from(g.selfloop_edges())
        n = nx.number_of_nodes(g)
        m = nx.number_of_edges(g)
        h = flowBalancing(g)
        uv = outflow(h,"source")
        pv = h.size(weight='weight')-uv
        return [n,m,uv,pv]

The growth network models

Fractal-Small world networks by Song et al.

    def fractal_model(generation,m,x,e):
        	"""
        	Returns the fractal model introduced by 
        	Song, Havlin, Makse in Nature Physics 2, 275.
        	generation = number of generations
        	m = number of offspring per node
        	x = number of connections between offsprings
        	e = probability that hubs stay connected
        	1-e = probability that x offsprings connect.
        	If e=1 we are in MODE 1 (pure small-world).
        	If e=0 we are in MODE 2 (pure fractal).
        	"""
        	G=Graph()
        	G.add_edge(0,1) #This is the seed for the network (generation 0)
        	node_index = 2
        	for n in range(1,generation+1):
        		all_links = G.edges()
        		while all_links:
        			link = all_links.pop()
        			new_nodes_a = range(node_index,node_index + m)
        			#random.shuffle(new_nodes_a)
        			node_index += m
        			new_nodes_b = range(node_index,node_index + m)
        			#random.shuffle(new_nodes_b)
        			node_index += m
        			G.add_edges_from([(link[0],node) for node in new_nodes_a])
        			G.add_edges_from([(link[1],node) for node in new_nodes_b])
        			repulsive_links = zip(new_nodes_a,new_nodes_b)
        			G.add_edges_from([repulsive_links.pop() for i in range(x-1)])
        			if random.random() > e:
        				G.remove_edge(*link)
        				G.add_edge(*repulsive_links.pop())
    	return G

Data analysis tools

Pearson correlation

import libraries

    from scipy.stats.stats import pearsonr

function usage

    pearsonr(x, y)

Uni-variate ordinary least square regression

import libraries

    import statsmodels.api as sm

fit parameters

   def OLSRegressFit(x,y):
       xx = sm.add_constant(x, prepend=True)
       res = sm.OLS(y,xx).fit()
       constant, beta = res.params
       r2 = res.rsquared
       return [constant, beta, r2]

plot data points and the regression line

   def OLSRegressPlot(x,y,col,xlab,ylab):
       xx = sm.add_constant(x, prepend=True)
       res = sm.OLS(y,xx).fit()
       constant, beta = res.params
       r2 = res.rsquared
       plt.plot(x,y, "o",color=col,markersize=5)
       plt.plot(x,constant+x*beta,"red")
       plt.text(min(x)+(max(x)-min(x))/10,min(y)+(max(y)-min(y))/2,str(np.round(beta,2)))
       plt.xlabel(xlab)
       plt.ylabel(ylab)

Uni-variate othorgonal regression

import libraries

    import scipy.odr

fit parameters

    def othorgonalRegressFit(x,y):
        def f(p, x):
            return p[0]+p[1]*x
        myModel = scipy.odr.Model(f)
        myData = scipy.odr.Data(x, y)
        myOdr = scipy.odr.ODR(myData, myModel , beta0=[1,1])
        myOdr.set_job(fit_type=0) 
        out = myOdr.run()
        constant, beta = out.beta
        SSE = sum((consatnt + x*beta - y)**2)
        SST = SSE + sum((y-np.mean(y))**2)
        r2 = 1-SSE/SST
        return [constant, beta, r2]

plot data points and the regression line

    def othorgonalRegressPlot(x, y, col, xlab, ylab):
        def f(p, x):
            return p[0] + p[1]*x
        myModel = scipy.odr.Model(f)
        myData = scipy.odr.Data(x, y)
        myOdr = scipy.odr.ODR(myData, myModel , beta0=[1,1])
        myOdr.set_job(fit_type=0) 
        out = myOdr.run()
        constant, beta = out.beta
        plt.plot(x, y, "o", color=col, markersize=5)
        plt.plot(x,constant + x * beta,"red")
        plt.text(min(x)+(max(x)-min(x))/10,min(y)+(max(y)-min(y))/2,str(np.round(beta,2)))
        plt.xlabel(xlab)
        plt.ylabel(ylab)

Multi-variate ordinary least square regression

import libraries

    import statsmodels.api as sm

fit parameters

    def OLSMRegressFit(x1, x2, y):
        x = np.column_stack((x1,x2))
        x = sm.add_constant(x, prepend=True)
        res = sm.OLS(y,x).fit()
        constant, beta1, beta2 = res.params
        return [constant, beta1, beta2]

log-binned data

    def binData(x, y, binScale):                                                                       # 0 < binScale < 1
        data = np.transpose(np.vstack((np.array(x),np.array(y))))
        data = np.log (data + 1)
        rs = np.arange(floor(min(data[:,0])),ceil(max(data[:,0])),binScale)
        rs = np.transpose(np.vstack((rs[:-1],rs[1:])))
        ps=[]
        def binf(rang):
            try:
                d1 = data[data[:,0] >= rang[0]]
                d2 = d1[d1[:,0] < rang[1]]
                if np.mean(d2[:,0])>0 and np.mean(d2[:,1]) >0:
                    ps.append( [np.mean(d2[:,0]),np.mean(d2[:,1])])
            except:
                pass
        map(binf,rs)
        ps=np.array(ps)
        return ps

Fit the Discrete Generalized Beta Distribution

import libraries

    import statsmodels.api as sm

fit parameters

    def DGBDfFit(data):
        t=array(sorted(data,key=lambda x:-x))
        r=array(range(1,len(data)+1))   
        y = log(t)
        x1 = log(max(r)+1-r)
        x2 = log(r)
        x = np.column_stack((x1,x2))
        x = sm.add_constant(x, prepend=True)
        res = sm.OLS(y,x).fit()
        [A,b,a] = res.params
        return [A,b,a]

plot data points and the fitted curve

    def DGBDPlot(data):
        t=array(sorted(data,key=lambda x:-x))
        r=array(range(1,len(data)+1))   
        y = log(t)
        x1 = log(max(r)+1-r)
        x2 = log(r)
        x = np.column_stack((x1,x2))
        x = sm.add_constant(x, prepend=True)
        res = sm.OLS(y,x).fit()
        [A,b,a] = res.params
        plt.plot(r,t,"o",color="b")
        plt.plot(r,exp(A)*(max(r)+1-r)**b*r**a,"r-")
        plt.yscale('log')
        plt.text(max(r)/2,max(t)/100,"b=" + str(round(b,2)) + ", a=" + str(round(a,2)))

Fit the Power law distribution with exponential cut-off

import libraries

    import statsmodels.api as sm
    import random

fit parameters

    def powerLawExponentialCutOffFit(data):
        t = np.array(sorted(data,key=lambda x:-x))
        r = np.array(range(len(data))) +1
        r = r/np.max(r)
        y = np.log(r)
        x1 = np.log(t)
        x2 = t
        x = np.column_stack((x1,x2))
        x = sm.add_constant(x, prepend=True)
        res = sm.OLS(y,x).fit()
        L,alpha,lambda = res.params
        return [L,alpha,lambda]

plot data points and the fitted curve

    def powerLawExponentialCutOffPlot(data):
        t = np.array(sorted(data,key=lambda x:-x))
        r = np.array(range(len(data))) +1
        r = r/np.max(r)
        y = np.log(r)
        x1 = np.log(t)
        x2 = t
        x = np.column_stack((x1,x2))
        x = sm.add_constant(x, prepend=True)
        res = sm.OLS(y,x).fit()
        L,alpha,lambda = res.params
        return [L,alpha,lambda]
        plt.plot(t,r,".",color="SteelBlue",alpha=0.75,markersize=3)
        plt.plot(t,exp(L) * t ** alpha * exp(lambda * t),"r-")
        plt.xscale('log'); plt.yscale('log')
        plt.xlabel(r'value ')
        plt.ylabel(r'cumulative probability')

Ticks of Python programming

Import file separated by "\t" symbol

    data = []
    f = open('E:\xxx','rb')
    for line in f.readlines():
        line = line.strip().split("\t")
        # add a filter such as: if line[0] == 1
        data.append(line)
    f.close()

Export file separated by "\t" symbol

    f = open("E:/xxx", "wb")
    for i in data:
        f.write("\t".join(map(str,i)) + "\n")
    f.close()

Select rows with all elements greater than 0 in an array=

    d = d[d.all(1)]

Encoding/decoding

    s = s.decode('gb2312').encode('utf-8')

Obtain a list of continuous days

import libraries

    import datetime

function definition

    def getDays(year1, month1, day1, year2, month2, day2):
        dt = datetime.datetime(year1, month1, day1)
        end = datetime.datetime(year2, month2, day2)
        step = datetime.timedelta(days=1)
        days = []
        while dt < end:
            days.append(dt.strftime('%Y%m%d'))
            dt += step
        return days

function usage

    days = getDays(2013, 2, 27, 2013, 4, 27)

plotting histogram

function definition

    def histogram (data,intervals,color,xlab):
        weights = np.ones_like(data)/len(data)
        plt.hist(data, intervals, facecolor = color, weights=weights, alpha = 0.75)
        plt.xlabel(xlab)

function usage

    histogram (x,30,'green',r'$\theta$')

plot sub-figures

    figure(num=None, figsize=(12, 4), dpi=80, facecolor='w', edgecolor='k')      
    plt.subplot(131)                                                             
    plt.plot(1,1,"bo")
    plt.subplot(132)
    plt.plot(1,1,"ro")
    plt.subplot(133)
    plt.plot(1,1,"mo")
    plt.tight_layout(pad=0.4, w_pad=0, h_pad=1.0)

if all characters in a string "s" is numeric

    s.isdigit()
个人工具
名字空间
操作
导航
工具箱