Notebook 14 – Math 2121, Fall 2020

In this notebook we will explore an application of eigenvalues and eigenvectors to drawing graphs (in the sense of discrete mathematics, rather than calculus) and to ranking the importance of nodes in a network (specifically, the MTR system in Hong Kong).

8.2 μs

xxxxxxxxxx
 
using LinearAlgebra

433 μs

xxxxxxxxxx
 
using Plots

10.7 s

A (simple, undirected) graph is a set of vertices $V = {v_{1}, v_{2}, \dots, v_{n}}$ along with a set of edges $v_{i} ⟷ v_{j}$ between distinct vertices. We can draw a graph by plotting points that correspond to the vertices and drawing lines between vertices that are connected by an edge.

8.3 μs

plot_graph (generic function with 1 method)

xxxxxxxxxx
 
function plot_graph(edges, index, xy)
    # helper method for plotting graphs, which uses the columns of xy
    # (which should be a 2-by-n matrix where n = number of vertices in graph) 
    # as the vertex locations
    
    p = plot(grid=nothing,  axis=nothing, foreground_color_subplot="white",         
             aspect_ratio=:equal)
    for (v, adjacent) in edges
        if haskey(index, v)
            i = index[v]
            x, y = xy[1, i], xy[2, i]
            for w in adjacent
                if haskey(index, w)
                    j = index[w]
                    dx, dy = xy[1, j] - x, xy[2, j] - y
                    quiver!(quiver = ([dx],[dy]), [x], [y], color=:blue)
                end
            end
        end
    end
    scatter!(xy[1,:], xy[2,:], markersize = 5, legend=false)
    return p
end

57.1 μs

plot_graph_random (generic function with 1 method)

xxxxxxxxxx
 
function plot_graph_random(edges, index)
    # this method just passes a random 2-row vector xy to the plot_graph function
    xy = rand(2, length(index))
    plot_graph(edges, index, xy)
end

22.1 μs

graph_index (generic function with 1 method)

xxxxxxxxxx
 
function graph_index(edges)
    vertices = [v for (v, k)=edges]
    degrees = [(-length(k), v) for (v, k)=edges]
    perm = sortperm(degrees)
    return Dict([degrees[perm[i]][2] => i for i=1:length(perm)])
end

58.7 μs

For example, here is a graph with 6 vertices and 7 edges:

2.7 μs

xxxxxxxxxx
 
begin
    example_edges = Dict(
        1 => [2, 5], 
        2 => [1, 3, 5], 
        3 => [2, 4], 
        4 => [3, 5, 6], 
        5 => [1, 2, 4], 
        6 => [4])
    example_index = Dict([i => i for i=1:6])
    
    plot_graph_random(example_edges, example_index)
end

6.7 s

The adjacency matrix of a graph with vertices $V = {v_{1}, v_{2}, \dots, v_{n}}$ is the $n \times n$ matrix that has $1$ in position $(i, j)$ if $v_{i} ⟷ v_{j}$ is an edge and $0$ in position $(i, j)$ if $v_{i} ⟷ v_{j}$ is not an edge.

The degree of a vertex $v_{i}$ is the number of edges that involve $v_{i}$ .

This is the sum of the entries in column $i$ (or row $i$ ) of the adjacency matrix.

9.0 μs

graph_adjacency_matrix (generic function with 1 method)

xxxxxxxxxx
 
function graph_adjacency_matrix(edges, index)
    n = length(index)
    A = zeros(Int64, n, n)
    for (v, adjacent) in edges
        if haskey(index, v)
            for w in adjacent
                if haskey(index, w)
                    i = index[v]
                    j = index[w]
                    A[i, j] = 1
                end
            end
        end
    end
    return A
end

71.8 μs

6×6 Array{Int64,2}:
 0  1  0  0  1  0
 1  0  1  0  1  0
 0  1  0  1  0  0
 0  0  1  0  1  1
 1  1  0  1  0  0
 0  0  0  1  0  0

xxxxxxxxxx
 
A = graph_adjacency_matrix(example_edges, example_index)

30.8 ms

Dict

Int641

xxxxxxxxxx
 
example_edges

961 ns

The Laplacian matrix of a graph is the matrix $L = D - A$ where $A$ is the adjacency matrix and $D$ is the diagonal matrix whose entry in position $(i, i)$ is the degree of vertex $v_{i}$ .

10.9 μs

graph_laplacian_matrix (generic function with 1 method)

xxxxxxxxxx
 
function graph_laplacian_matrix(edges, index)
    A = graph_adjacency_matrix(edges, index)
    D = Diagonal(A * ones(Int64, length(index)))
    return D - A
end

28.1 μs

6×6 Array{Int64,2}:
  2  -1   0   0  -1   0
 -1   3  -1   0  -1   0
  0  -1   2  -1   0   0
  0   0  -1   3  -1  -1
 -1  -1   0  -1   3   0
  0   0   0  -1   0   1

xxxxxxxxxx
 
L = graph_laplacian_matrix(example_edges, example_index)

314 ms

Some interesting facts:

The eigenvalues of the adjacency matrix $A$ are always all real.
The eigenvalues of the Laplacian matrix $L$ are all real and nonnegative.
The matrix $L$ always has $0$ as an eigenvalue.
The multiplicity of the $0$ -eigenvalue of $L$ is the number of connected components of the graph.

5.6 μs

Float641

-2.13639

-1.20608

-0.540632

0.261144

1.08247

2.53948

xxxxxxxxxx
 
eigen(A).values

877 ms

Float641

0.0

0.721586

1.68257

3.0

3.70462

4.89122

xxxxxxxxxx
 
# the first eigenvalue is zero, but looks negative due to floating point limitations
eigen(L).values

41.4 μs

random_graph (generic function with 1 method)

xxxxxxxxxx
 
function random_graph(n, p)
    # creates a random graph with n vertices
    edges = Dict(i => [] for i=1:n)
    for i=1:n
        for j=i+1:n
            if rand() < p
                append!(edges[i], j)
                append!(edges[j], i)
            end
        end
    end
    return edges, graph_index(edges)
end

51.1 μs

connected_components (generic function with 1 method)

xxxxxxxxxx
 
function connected_components(edges, index)
    # computes connected components using graph Laplacian matrix
    L = graph_laplacian_matrix(edges, index)
    ans = 0
    for v=eigen(L).values
        if abs(v) < 1e-14
            ans += 1
        end
    end
    return ans
end

37.5 μs

xxxxxxxxxx
 
begin
    redges, rindex = random_graph(40, 0.07)
    plot_graph_random(redges, rindex)
end

622 ms

xxxxxxxxxx
 
connected_components(redges, rindex)

59.8 ms

Here is an interesting algorithm for drawing a complicated graph:

Compute the adjacency matrix $A$ or Laplacian matrix $L$ .
Take two distinct eigenvectors $u$ and $v$ for this matrix.
Rescale these eigenvectors to have the same length.
Use the coordinates $(u_{1}, v_{1})$ , $(u_{2}, v_{2})$ , $\dots$ , $(u_{n}, v_{n})$ for the vertices.
Often what works well is to use the two eigenvectors for $L$ with smallest nonzero eigenvalue.
Another choice is the use the two eigenvectors for $L$ with the largest eigenvalues.

5.0 μs

path_graph (generic function with 1 method)

xxxxxxxxxx
 
function path_graph(n)
    # creates path graph with n vertices
    index = Dict(i => i for i=1:n)
    edges = Dict(i => (i == n ? [n - 1] : i == 1 ? [2] : [i - 1, i + 1]) for i=1:n)
    return edges, index
end

54.0 μs

circle_graph (generic function with 1 method)

xxxxxxxxxx
 
function circle_graph(n)
    # creates circle graph with n vertices
    index = Dict(i => i for i=1:n)
    edges = Dict(i => [i == n ? 1 : i + 1, i == 1 ? n : i - 1] for i=1:n)
    return edges, index
end

49.6 μs

complete_graph (generic function with 1 method)

xxxxxxxxxx
 
function complete_graph(n)
    # creates complete graph with n vertices
    index = Dict(i => i for i=1:n)
    edges = Dict(i => [j for j=1:n if i != j] for i=1:n)
    return edges, index
end

80.8 μs

connected_random_graph (generic function with 2 methods)

xxxxxxxxxx
 
function connected_random_graph(n, p, trials=1000)
    # attempts to create a random graph with one connected component
    for iter=1:trials
        e, i = random_graph(n, p)
        if connected_components(e, i) == 1
            return e, i
        end
    end
    return random_graph(n, p)
end

42.6 μs

The following method creates the graph representing the station network for Hong Kong's MTR system, as it existed for a given input year.

2.6 μs

mtr_graph (generic function with 2 methods)

xxxxxxxxxx
 
begin
    function reverse_index(index)
        return Dict(v => k for (k, v) in index)
    end 
    
    function get_year(s)
        baseline = -2208988800 # this the UTC time for 1 Jan 1900 in seconds
        year = 365 * 24 * 60 * 60 # seconds in a year
        return 1900 + (s - baseline) / year
    end
    
    function subgraph(index, vertices)
        ans = Dict()
        rindex = reverse_index(index)
        c = 1
        for i=1:length(rindex)
            v = rindex[i]
            if v in vertices
                ans[v] = c
                c += 1
            end
        end
        return ans
    end
    
    function restrict_before_year(edges, dates, year)
        return Dict([
            v => [u for u=e if get_year(dates[u]) <= year   ]
            for (v, e) = edges if get_year(dates[v]) <= year
        ])
    end 
​
    function mtr_graph(year=2020)
    
    edges = Dict(["East Tsim Sha Tsui" => ["Austin", "Hung Hom", "Tsim Sha Tsui"], "Tsim Sha Tsui" => ["East Tsim Sha Tsui", "Jordan", "Admiralty"], "Central" => ["Hong Kong", "Sheung Wan", "Admiralty"], "Hong Kong" => ["Central", "Kowloon"], "Lo Wu" => ["Sheung Shui"], "Lok Ma Chau" => ["Sheung Shui"], "Sheung Shui" => ["Lo Wu", "Lok Ma Chau", "Fanling"], "Fanling" => ["Tai Wo", "Sheung Shui"], "Tai Wo" => ["Tai Po Market", "Fanling"], "Tai Po Market" => ["University", "Tai Wo"], "University" => ["Tai Po Market", "Racecourse", "Fo Tan"], "Racecourse" => ["University", "Sha Tin"], "Fo Tan" => ["University", "Sha Tin"], "Sha Tin" => ["Fo Tan", "Tai Wai", "Racecourse"], "Tai Wai" => ["Kowloon Tong", "Sha Tin", "Hin Keng", "Che Kung Temple"], "Kowloon Tong" => ["Mong Kok East", "Lok Fu", "Shek Kip Mei", "Tai Wai"], "Mong Kok East" => ["Kowloon Tong", "Hung Hom"], "Hung Hom" => ["East Tsim Sha Tsui", "Mong Kok East"], "Whampoa" => ["Ho Man Tin"], "Ho Man Tin" => ["Whampoa", "Yau Ma Tei"], "Yau Ma Tei" => ["Ho Man Tin", "Jordan", "Mong Kok"], "Mong Kok" => ["Prince Edward", "Yau Ma Tei"], "Prince Edward" => ["Sham Shui Po", "Shek Kip Mei", "Mong Kok"], "Shek Kip Mei" => ["Kowloon Tong", "Prince Edward"], "Lok Fu" => ["Kowloon Tong", "Wong Tai Sin"], "Wong Tai Sin" => ["Lok Fu", "Diamond Hill"], "Diamond Hill" => ["Kai Tak", "Hin Keng", "Wong Tai Sin", "Choi Hung"], "Choi Hung" => ["Kowloon Bay", "Diamond Hill"], "Kowloon Bay" => ["Ngau Tau Kok", "Choi Hung"], "Ngau Tau Kok" => ["Kowloon Bay", "Kwun Tong"], "Kwun Tong" => ["Ngau Tau Kok", "Lam Tin"], "Lam Tin" => ["Kwun Tong", "Yau Tong"], "Yau Tong" => ["Tiu Keng Leng", "Quarry Bay", "Lam Tin"], "Tiu Keng Leng" => ["Yau Tong", "Tseung Kwan O"], "Tsuen Wan" => ["Tai Wo Hau"], "Tai Wo Hau" => ["Kwai Hing", "Tsuen Wan"], "Kwai Hing" => ["Kwai Fong", "Tai Wo Hau"], "Kwai Fong" => ["Kwai Hing", "Lai King"], "Lai King" => ["Mei Foo", "Nam Cheong", "Kwai Fong", "Tsing Yi"], "Mei Foo" => ["Tsuen Wan West", "Lai Chi Kok", "Lai King", "Nam Cheong"], "Lai Chi Kok" => ["Mei Foo", "Cheung Sha Wan"], "Cheung Sha Wan" => ["Lai Chi Kok", "Sham Shui Po"], "Sham Shui Po" => ["Prince Edward", "Cheung Sha Wan"], "Jordan" => ["Tsim Sha Tsui", "Yau Ma Tei"], "Admiralty" => ["Central", "Ocean Park", "Wan Chai", "Tsim Sha Tsui"], "Kennedy Town" => ["HKU"], "HKU" => ["Kennedy Town", "Sai Ying Pun"], "Sai Ying Pun" => ["Sheung Wan", "HKU"], "Sheung Wan" => ["Central", "Sai Ying Pun"], "Wan Chai" => ["Causeway Bay", "Admiralty"], "Causeway Bay" => ["Tin Hau", "Wan Chai"], "Tin Hau" => ["Causeway Bay", "Fortress Hill"], "Fortress Hill" => ["Tin Hau", "North Point"], "North Point" => ["Fortress Hill", "Quarry Bay"], "Quarry Bay" => ["North Point", "Yau Tong", "Tai Koo"], "Tai Koo" => ["Quarry Bay", "Sai Wan Ho"], "Sai Wan Ho" => ["Tai Koo", "Shau Kei Wan"], "Shau Kei Wan" => ["Heng Fa Chuen", "Sai Wan Ho"], "Heng Fa Chuen" => ["Shau Kei Wan", "Chai Wan"], "Chai Wan" => ["Heng Fa Chuen"], "Tung Chung" => ["Sunny Bay"], "Sunny Bay" => ["Tung Chung", "Disneyland Resort", "Tsing Yi"], "Tsing Yi" => ["Airport", "Lai King", "Sunny Bay", "Kowloon"], "Nam Cheong" => ["Olympic", "Lai King", "Austin", "Mei Foo"], "Olympic" => ["Nam Cheong", "Kowloon"], "Kowloon" => ["Olympic", "Hong Kong", "Tsing Yi"], "AsiaWorld–Expo" => ["Airport"], "Airport" => ["AsiaWorld–Expo", "Tsing Yi"], "Po Lam" => ["Hang Hau"], "Hang Hau" => ["Po Lam", "Tseung Kwan O"], "LOHAS Park" => ["Tseung Kwan O"], "Tseung Kwan O" => ["Tiu Keng Leng", "LOHAS Park", "Hang Hau"], "Tuen Mun" => ["Siu Hong"], "Siu Hong" => ["Tin Shui Wai", "Tuen Mun"], "Tin Shui Wai" => ["Long Ping", "Siu Hong"], "Long Ping" => ["Tin Shui Wai", "Yuen Long"], "Yuen Long" => ["Kam Sheung Road", "Long Ping"], "Kam Sheung Road" => ["Tsuen Wan West", "Yuen Long"], "Tsuen Wan West" => ["Kam Sheung Road", "Mei Foo"], "Austin" => ["East Tsim Sha Tsui", "Nam Cheong"], "Wu Kai Sha" => ["Ma On Shan"], "Ma On Shan" => ["Heng On", "Wu Kai Sha"], "Heng On" => ["Tai Shui Hang", "Ma On Shan"], "Tai Shui Hang" => ["Heng On", "Shek Mun"], "Shek Mun" => ["Tai Shui Hang", "City One"], "City One" => ["Shek Mun", "Sha Tin Wai"], "Sha Tin Wai" => ["Che Kung Temple", "City One"], "Che Kung Temple" => ["Tai Wai", "Sha Tin Wai"], "Hin Keng" => ["Diamond Hill", "Tai Wai"], "Kai Tak" => ["Diamond Hill"], "Disneyland Resort" => ["Sunny Bay"], "South Horizons" => ["Lei Tung"], "Lei Tung" => ["Wong Chuk Hang", "South Horizons"], "Wong Chuk Hang" => ["Ocean Park", "Lei Tung"], "Ocean Park" => ["Wong Chuk Hang", "Admiralty"]])
    
    dates = Dict(["Lo Wu" => -637977600, "Lok Ma Chau" => 1187136000, "Sheung Shui" => -1250640000, "Fanling" => -1869868800, "Tai Wo" => 610675200, "Tai Po Market" => 418521600, "University" => -418780800, "Racecourse" => 276566400, "Fo Tan" => 477273600, "Sha Tin" => -1869868800, "Tai Wai" => 429753600, "Kowloon Tong" => 307584000, "Mong Kok East" => -1869868800, "Hung Hom" => 186537600, "Whampoa" => 1477180800, "Ho Man Tin" => 1477180800, "Yau Ma Tei" => 315446400, "Mong Kok" => 315446400, "Prince Edward" => 389836800, "Shek Kip Mei" => 307584000, "Lok Fu" => 307584000, "Wong Tai Sin" => 307584000, "Diamond Hill" => 307584000, "Choi Hung" => 307584000, "Kowloon Bay" => 307584000, "Ngau Tau Kok" => 307584000, "Kwun Tong" => 307584000, "Lam Tin" => 623203200, "Yau Tong" => 1028419200, "Tiu Keng Leng" => 1029628800, "Tsuen Wan" => 389836800, "Tai Wo Hau" => 389836800, "Kwai Hing" => 389836800, "Kwai Fong" => 389836800, "Lai King" => 389836800, "Mei Foo" => 390441600, "Lai Chi Kok" => 390441600, "Cheung Sha Wan" => 390441600, "Sham Shui Po" => 390441600, "Jordan" => 315446400, "Tsim Sha Tsui" => 315446400, "Admiralty" => 319161600, "Central" => 319161600, "Kennedy Town" => 1419724800, "HKU" => 1419724800, "Sai Ying Pun" => 1427587200, "Sheung Wan" => 517190400, "Wan Chai" => 486345600, "Causeway Bay" => 486345600, "Tin Hau" => 486345600, "Fortress Hill" => 486345600, "North Point" => 486345600, "Quarry Bay" => 486345600, "Tai Koo" => 486345600, "Sai Wan Ho" => 486345600, "Shau Kei Wan" => 486345600, "Heng Fa Chuen" => 486345600, "Chai Wan" => 486345600, "Tung Chung" => 898387200, "Sunny Bay" => 1117584000, "Tsing Yi" => 898387200, "Nam Cheong" => 1071878400, "Olympic" => 898387200, "Kowloon" => 898387200, "Hong Kong" => 898387200, "AsiaWorld–Expo" => 1135036800, "Airport" => 899683200, "Po Lam" => 1029628800, "Hang Hau" => 1029628800, "LOHAS Park" => 1248566400, "Tseung Kwan O" => 1029628800, "Tuen Mun" => 1071878400, "Siu Hong" => 1071878400, "Tin Shui Wai" => 1071878400, "Long Ping" => 1071878400, "Yuen Long" => 1071878400, "Kam Sheung Road" => 1071878400, "Tsuen Wan West" => 1071878400, "Austin" => 1250380800, "East Tsim Sha Tsui" => 1098576000, "Wu Kai Sha" => 1103587200, "Ma On Shan" => 1103587200, "Heng On" => 1103587200, "Tai Shui Hang" => 1103587200, "Shek Mun" => 1103587200, "City One" => 1103587200, "Sha Tin Wai" => 1103587200, "Che Kung Temple" => 1103587200, "Hin Keng" => 1581638400, "Kai Tak" => 1581638400, "Disneyland Resort" => 1122854400, "South Horizons" => 1482883200, "Lei Tung" => 1482883200, "Wong Chuk Hang" => 1482883200, "Ocean Park" => 1482883200])
    
    edges = restrict_before_year(edges, dates, year + 1)
    index = graph_index(edges)
    
    return edges, index
​
    end
    
end

263 μs

plot_graph_adjacency_low (generic function with 1 method)

xxxxxxxxxx
 
function plot_graph_adjacency_low(edges, index)
    # plots graph using lowest eigenvectors of adjacency matrix as xy-coordinates
    n = length(index)
    A = graph_adjacency_matrix(edges, index)
    xy = transpose(eigen(A).vectors[:,1:2])
    plot_graph(edges, index, xy)
end

28.5 μs

plot_graph_adjacency_high (generic function with 1 method)

xxxxxxxxxx
 
function plot_graph_adjacency_high(edges, index)
    # plots graph using highest eigenvectors of adjacency matrix as xy-coordinates
    n = length(index)
    A = graph_adjacency_matrix(edges, index)
    xy = transpose(eigen(A).vectors[:,end - 1:end])
    plot_graph(edges, index, xy)
end

30.8 μs

plot_graph_laplacian_low (generic function with 1 method)

xxxxxxxxxx
 
function plot_graph_laplacian_low(edges, index)
    # plots graph using lowest nonzero eigenvectors of Laplacian as xy-coordinates
    n = length(index)
    A = graph_laplacian_matrix(edges, index)
    
    c = 2
    eig = eigen(A).values
    while c + 1 < n && abs(eig[c]) < 10e-14
        c += 1
    end
    
    xy = transpose(eigen(A).vectors[:, c:c+1])
    plot_graph(edges, index, xy)
end

91.1 μs

plot_graph_laplacian_high (generic function with 1 method)

xxxxxxxxxx
 
function plot_graph_laplacian_high(edges, index)
    # plots graph using highest eigenvectors of Laplacian as xy-coordinates
    n = length(index)
    A = graph_laplacian_matrix(edges, index)
    xy = transpose(eigen(A).vectors[:, end - 1:end])
    plot_graph(edges, index, xy)
end

42.5 μs

get_adjacency_plots (generic function with 1 method)

xxxxxxxxxx
 
function get_adjacency_plots(e, i)
    plot(plot_graph_adjacency_low(e, i),
         plot_graph_adjacency_high(e, i))
end

21.1 μs

get_laplacian_plots (generic function with 1 method)

xxxxxxxxxx
 
function get_laplacian_plots(e, i)
    plot(plot_graph_laplacian_low(e, i),
         plot_graph_laplacian_high(e, i))
end

24.3 μs

Here is our starting graph from the example above, plotted randomly:

2.5 μs

xxxxxxxxxx
 
begin
    test_edges = example_edges
    test_index = example_index
    
    plot_graph_random(test_edges, test_index)
end

10.9 ms

Here is the same graph, plotted using eigenvector methods based on the adjacency matrix:

2.4 μs

xxxxxxxxxx
 
get_adjacency_plots(test_edges, test_index)

923 ms

Here is the same graph, plotted using eigenvector methods based on the Laplacian matrix:

2.4 μs

xxxxxxxxxx
 
get_laplacian_plots(test_edges, test_index)

50.2 ms

In my opinion the plot using the eigenvectors of the Laplacian for the lowest nonzero eigenvalues as xy-coordinates gives the 'best' output.

Here are a few more examples to underscore this claim:

3.3 μs

xxxxxxxxxx
 
begin
    path_edges, path_index = path_graph(40)
    plot_graph_random(path_edges, path_index)
end

157 ms

xxxxxxxxxx
 
get_adjacency_plots(path_edges, path_index)

157 ms

xxxxxxxxxx
 
get_laplacian_plots(path_edges, path_index)

169 ms

xxxxxxxxxx
 
begin
    circle_edges, circle_index = circle_graph(30)
    plot_graph_random(circle_edges, circle_index)
end

129 ms

xxxxxxxxxx
 
get_adjacency_plots(circle_edges, circle_index)

140 ms

xxxxxxxxxx
 
get_laplacian_plots(circle_edges, circle_index)

107 ms

xxxxxxxxxx
 
begin
    complete_edges, complete_index = complete_graph(9)
    plot_graph_random(complete_edges, complete_index)
end

159 ms

xxxxxxxxxx
 
get_adjacency_plots(complete_edges, complete_index)

128 ms

xxxxxxxxxx
 
get_laplacian_plots(complete_edges, complete_index)

101 ms

Remark: our eigenvector methods do not give such great pictures of the complete graph on $n$ vertices.

6.2 μs

xxxxxxxxxx
 
begin
    connected_edges, connected_index = connected_random_graph(10, 0.1, 10000)
    plot_graph_random(connected_edges, connected_index)
end

81.1 ms

xxxxxxxxxx
 
get_adjacency_plots(connected_edges, connected_index)

161 ms

xxxxxxxxxx
 
get_laplacian_plots(connected_edges, connected_index)

40.6 ms

1Dict

"Kwai Fong"

String1

"Kwai Hing"

"Lai King"

"Kwun Tong"

String1

"Ngau Tau Kok"

"Lam Tin"

"Whampoa"

String1

"Ho Man Tin"

"Ngau Tau Kok"

String1

"Kowloon Bay"

"Kwun Tong"

"Mei Foo"

String1

"Tsuen Wan West"

"Lai Chi Kok"

"Lai King"

"Nam Cheong"

"Long Ping"

String1

"Tin Shui Wai"

"Yuen Long"

"Tung Chung"

String1

"Sunny Bay"

"Chai Wan"

String1

"Heng Fa Chuen"

"Austin"

String1

"East Tsim Sha Tsui"

"Nam Cheong"

"Tai Wai"

String1

"Kowloon Tong"

"Sha Tin"

"Hin Keng"

"Che Kung Temple"

"Heng On"

String1

"Tai Shui Hang"

"Ma On Shan"

"Tuen Mun"

String1

"Siu Hong"

"Central"

String1

"Hong Kong"

"Sheung Wan"

"Admiralty"

"Shek Kip Mei"

String1

"Kowloon Tong"

"Prince Edward"

"Kwai Hing"

String1

"Kwai Fong"

"Tai Wo Hau"

"Sai Ying Pun"

String1

"Sheung Wan"

"HKU"

"Wan Chai"

String1

"Causeway Bay"

"Admiralty"

"Diamond Hill"

String1

"Kai Tak"

"Hin Keng"

"Wong Tai Sin"

"Choi Hung"

"Ocean Park"

String1

"Wong Chuk Hang"

"Admiralty"

"Wong Chuk Hang"

String1

"Ocean Park"

"Lei Tung"

"AsiaWorld–Expo"

String1

"Airport"

"North Point"

String1

"Fortress Hill"

"Quarry Bay"

"Sai Wan Ho"

String1

"Tai Koo"

"Shau Kei Wan"

"Kowloon Bay"

String1

"Ngau Tau Kok"

"Choi Hung"

"Nam Cheong"

String1

"Olympic"

"Lai King"

"Austin"

"Mei Foo"

"Sheung Shui"

String1

"Lo Wu"

"Lok Ma Chau"

"Fanling"

"Tsim Sha Tsui"

String1

"East Tsim Sha Tsui"

"Jordan"

"Admiralty"

"Quarry Bay"

String1

"North Point"

"Yau Tong"

"Tai Koo"

"Yau Tong"

String1

"Tiu Keng Leng"

"Quarry Bay"

"Lam Tin"

"Lai Chi Kok"

String1

"Mei Foo"

"Cheung Sha Wan"

more2Dict

"Mei Foo"

"Kwai Fong"

"Kwun Tong"

"Ngau Tau Kok"

"Whampoa"

"Long Ping"

"Tung Chung"

"Austin"

"Chai Wan"

"Tai Wai"

"Heng On"

"Tuen Mun"

"Central"

"Kwai Hing"

"Sai Ying Pun"

"Shek Kip Mei"

"Wan Chai"

"Diamond Hill"

"Ocean Park"

"Wong Chuk Hang"

"AsiaWorld–Expo"

"North Point"

"Sai Wan Ho"

"Kowloon Bay"

"Nam Cheong"

"Sheung Shui"

"Quarry Bay"

"Tsim Sha Tsui"

"Yau Tong"

"Lai Chi Kok"

xxxxxxxxxx
 
mtr_edges, mtr_index = mtr_graph(2020) # MTR system as of 2020

739 ms

xxxxxxxxxx
 
length(mtr_index) # number of stations

2.2 ms

String1

"Yau Tong"

"Tseung Kwan O"

xxxxxxxxxx
 
mtr_edges["Tiu Keng Leng"] # stations adjacent to Tiu Keng Leng, for example

5.3 ms

xxxxxxxxxx
 
plot_graph_random(mtr_edges, mtr_index)

316 ms

xxxxxxxxxx
 
get_adjacency_plots(mtr_edges, mtr_index)

669 ms

xxxxxxxxxx
 
get_laplacian_plots(mtr_edges, mtr_index)

505 ms

The bottom left graph vaguely resembles the actual MTR map.

12.4 μs

plot_mtr_graph (generic function with 1 method)

xxxxxxxxxx
 
function plot_mtr_graph(year)
    edges, index = mtr_graph(year)
    plot_graph_laplacian_low(edges, index)
end

72.0 μs

Dict

"Lo Wu"

"Kwun Tong"

"Ngau Tau Kok"

"Hung Hom"

"Kowloon Tong"

"Fanling"

"Shek Kip Mei"

"University"

"Wong Tai Sin"

"Mong Kok East"

"Diamond Hill"

"Lok Fu"

"Sha Tin"

"Kowloon Bay"

"Choi Hung"

"Racecourse"

"Sheung Shui"

xxxxxxxxxx
 
mtr_graph(1979)[2] # stations open in 1979

100 μs

xxxxxxxxxx
 
plot_mtr_graph(1979)

29.1 ms

xxxxxxxxxx
 
plot_mtr_graph(1981)

40.1 ms

xxxxxxxxxx
 
plot_mtr_graph(1985)

80.3 ms

xxxxxxxxxx
 
plot_mtr_graph(1990)

114 ms

xxxxxxxxxx
 
plot_mtr_graph(1998) # we can see some major extension to the MTR system in 1998

110 ms

xxxxxxxxxx
 
plot_mtr_graph(2002)

137 ms

xxxxxxxxxx
 
plot_mtr_graph(2005)

203 ms

xxxxxxxxxx
 
plot_mtr_graph(2007)

189 ms

xxxxxxxxxx
 
plot_mtr_graph(2009)

203 ms

xxxxxxxxxx
 
plot_mtr_graph(2017)

196 ms

xxxxxxxxxx
 
plot_mtr_graph(2020)

219 ms

Another application:

Take the eigenvector of the adjacency matrix $A$ with largest eigenvalue.
This vector can be rescaled to have all nonpositive entries.
These entries determine an order on the vertices of your graph.
This ordering (approximately) will be put important vertices first, unimportant ones last.

6.2 μs

rank_vertices (generic function with 1 method)

xxxxxxxxxx
 
function rank_vertices(edges, index)
    A = graph_adjacency_matrix(edges, index)
    vec = eigen(A).vectors[:, end]
    # rescale to have all nonpositive entries
    if maximum(vec) > 1e-12
        vec = -vec
    end
    @assert maximum(vec) <= 1e-12
    rind = reverse_index(index)
    ranks = [rind[i] for i = sortperm(vec)]
    scores = Dict(key => vec[i] / minimum(vec) for (key, i) in index)
    return ranks, scores
end

101 μs

Applying this strategy gives the following ranking of MTR stations, from most important to least important.

Does Lai King make sense as #1?

Certainly Chai Wan, Po Lam, and We Kai Sha are plausible as the least important stations, since each is located at the end of its respective line.

5.6 μs

1String1

"Lai King"

"Nam Cheong"

"Mei Foo"

"Tsing Yi"

"Olympic"

"Kowloon"

"Kwai Fong"

"Austin"

"Lai Chi Kok"

"Tsuen Wan West"

"Sunny Bay"

"Airport"

"East Tsim Sha Tsui"

"Hong Kong"

"Kwai Hing"

"Cheung Sha Wan"

"Kam Sheung Road"

"Tsim Sha Tsui"

"Central"

"Admiralty"

more86

"Lo Wu"

"Lok Ma Chau"

"Shau Kei Wan"

"Ma On Shan"

"Hang Hau"

"LOHAS Park"

"Heng Fa Chuen"

"Wu Kai Sha"

"Po Lam"

"Chai Wan"

2Dict

"Mei Foo"

0.903978

"Kwai Fong"

0.392665

"Kwun Tong"

0.000229309

"Ngau Tau Kok"

0.000494898

"Whampoa"

0.00392845

"Long Ping"

0.0215197

"Tung Chung"

0.104997

"Austin"

0.391578

"Chai Wan"

4.53137e-6

"Tai Wai"

0.0213396

"Heng On"

7.81581e-5

"Tuen Mun"

0.00110401

"Central"

0.126188

"Kwai Hing"

0.15378

"Sai Ying Pun"

0.0194052

"Shek Kip Mei"

0.0239

"Wan Chai"

0.0464861

"Diamond Hill"

0.00789618

"Ocean Park"

0.0464627

"Wong Chuk Hang"

0.0181963

"AsiaWorld–Expo"

0.0912432

"North Point"

0.0011613

"Sai Wan Ho"

8.83265e-5

"Kowloon Bay"

0.00122487

"Nam Cheong"

0.945156

"Sheung Shui"

0.00010659

"Quarry Bay"

0.000572624

"Tsim Sha Tsui"

0.128545

"Yau Tong"

0.000296319

"Lai Chi Kok"

0.355898

xxxxxxxxxx
 
ranks, scores = rank_vertices(mtr_edges, mtr_index)

796 ms

Here is a method to plot the relative 'eigenvector centrality' of various MTR stations as they evolve over time.

3.0 μs

plot_ranking (generic function with 1 method)

xxxxxxxxxx
 
function plot_ranking(stations)
    ans = plot(
        title="Eigenvector centrality of MTR stations",
        legend = :outertopleft,
        xlabel = "Year"
    )
    for station = stations
        series = []
        years = []
        for year=1910:2020
            edges, index = mtr_graph(year)
            if haskey(index, station)
                rank, score = rank_vertices(edges, index)
                rind = reverse_index(Dict(i => rank[i] for i=1:length(rank)))
                append!(series, 1.0 - (rind[station] - 1) / (length(index) - 1))
                append!(years, year)
            end
        end
        plot!(years, series, label=station, ylim=(0.0, 1.0))
    end
    return ans
end

80.6 μs

xxxxxxxxxx
 
plot_ranking(["Choi Hung"])

1.4 s

xxxxxxxxxx
 
plot_ranking(["Kowloon Tong", "Lai King", "Central", "North Point", "Diamond Hill"])

230 ms

Some observations:

Central continues to become more central to the network.
In the early history of the MTR, Kowloon Tong was the most important station.
Lai King jumps ahead in centrality after the Airport Express line opens (on 6 July 1998).
Increased centrality of Diamond Hill station in 2020 probably due to new Kai Tak station.

2.9 μs

One more application. An $n$ -step path in a graph is a sequence of $n$ edges

$v_{i_{0}} ⟷ v_{i_{1}} ⟷ v_{i_{2}} ⟷ \dots ⟷ v_{i_{n}}$

such that consecutive edges share an endpoint.

The path starts at vertex $v_{i_{0}}$ and edges at vertex $v_{i_{n}}$ .

Taking powers of the adjacency matrix lets us count all $n$ -step paths between two vertices.

The number of 1-step paths between vertices $v_{i}$ and $v_{j}$ is $1$ is these two vertices are connected by an edge and $0$ otherwise. This is just the entry in position $(i, j)$ of the adjacency matrix $A$ .

Similarly, the number of $n$ -step paths between vertices $v_{i}$ and $v_{j}$ is the entry in position $(i, j)$ of $A^{n}$ .

9.2 μs

xxxxxxxxxx
 
plot_graph_laplacian_low(test_edges, test_index)

13.2 ms

Dict

Int641

xxxxxxxxxx
 
test_edges

1.5 μs

6×6 Array{Int64,2}:
 0  1  0  0  1  0
 1  0  1  0  1  0
 0  1  0  1  0  0
 0  0  1  0  1  1
 1  1  0  1  0  0
 0  0  0  1  0  0

6×6 Array{Int64,2}:
 2  1  1  1  1  0
 1  3  0  2  1  0
 1  0  2  0  2  1
 1  2  0  3  0  0
 1  1  2  0  3  1
 0  0  1  0  1  1

6×6 Array{Int64,2}:
 2  4  2  2  4  1
 4  2  5  1  6  2
 2  5  0  5  1  0
 2  1  5  0  6  3
 4  6  1  6  2  0
 1  2  0  3  0  0

6×6 Array{Int64,2}:
 8   8   6   7   8  2
 8  15   3  13   7  1
 6   3  10   1  12  5
 7  13   1  14   3  0
 8   7  12   3  16  6
 2   1   5   0   6  3

xxxxxxxxxx
 
A, A^2, A^3, A^4

1.0 s

xxxxxxxxxx
 
# the number of 3-step paths from vertex 6 to vertex 4 in the test graph
(A^3)[4, 6]

4.6 ms

xxxxxxxxxx
 
# the number of 4-step paths from vertex 6 to vertex 5 in the test graph
(A^4)[5, 6]

51.0 μs

Consider the following modified adjacency matrix $M$ : take the usual adjacency matrix $A$ and divide all entries in column $i$ by the degree of vertex $i$ .

2.6 μs

degree_normalized_adjacency_matrix (generic function with 1 method)

xxxxxxxxxx
 
function degree_normalized_adjacency_matrix(edges, index)
    A = graph_adjacency_matrix(edges, index)
    D = Diagonal(A * ones(Int64, length(index)))
    return A * D^-1
end

30.0 μs

The entries in each column of $M$ are nonnegative and sum to $1.0$ . The same is true of each column of any power $M^{n}$ of this matrix. In other words, the entries in column $i$ of $M^{n}$ give a probability distribution on the vertices of your graph.

These probabilities give your chances of being at a givex vertex if you carry out the following random walk on your graph:

Start at vertex $v_{i}$ .
Choose an adjacent vertex (one connected by an edge) at random and move to that vertex.
Repeat $n$ times.

We can plot this distribution.

4.5 μs

path_distribution (generic function with 1 method)

xxxxxxxxxx
 
function path_distribution(edges, index, i, n)
    DNA = degree_normalized_adjacency_matrix(edges, index)
    ans = (DNA^n)[:, index[i]]
    rind = reverse_index(index)
    return ans, [rind[i] => ans[i] for i = reverse(sortperm(ans))]
end

53.9 μs

As $n$ becomes very large, the distribution of our random walk converges to some limiting set of probabilities that tell you how likely you are to be at a given vertex if you travel on the graph for a large number of steps, at each step choosing a random adjacent vertex to your current position to travel to.

Key fact: this limiting distribution is an eigenvector for $M$ with eigenvalue $1$ .

If the $1$ -eigenspace of $M$ is $1$ -dimensional, then there is a unique limiting distribution, given by the unique eigenvector with eigenvalue $1$ whose entries sum to $1$ .

It can happen that this eigenspace has dimension greater than one. In this case more than one limiting distribution may exist for our random walk, and which one we end up in depends on the vertex the walk starts at.

11.7 μs

limiting_distribution (generic function with 1 method)

xxxxxxxxxx
 
function limiting_distribution(edges, index, stepsize)
    DNA = degree_normalized_adjacency_matrix(edges, index)
    eig = eigen(DNA^stepsize)
    limit = eig.vectors[:, length(eig.values)]
    limit = abs.(limit / sum(limit))
    
    is_unique = length([i for i=eig.values if abs(1.0 - i) < 5e-15]) <= 1
    
    return limit, is_unique
end

55.0 μs

distribution_animation (generic function with 1 method)

xxxxxxxxxx
 
function distribution_animation(edges_index, start, steps, stepsize, title)
    edges, index = edges_index
    limit, is_unique = limiting_distribution(edges, index, stepsize)
    anim = @animate for i=1:stepsize:steps
        dist, _ = path_distribution(edges, index, start, i)
        
        plot(dist, 
            label="Distribution after $(i) steps",
            title="Convergence to limit distribution for walk on " * title,         
            color=:blue, 
            ylim = (0.0, maximum(limit) + 0.05)
        )
        
        if is_unique
            plot!(limit, color=:orange, label="Limiting distribution")
        end
    end every 1
    gif(anim, "anim_fps15.gif", fps = 5)
end

70.4 μs

xxxxxxxxxx
 
distribution_animation((test_edges, test_index), 1, 30, 2, "test graph")

4.9 s

xxxxxxxxxx
 
plot_graph_laplacian_low(path_graph(20)[1], path_graph(20)[2])

30.4 ms

On the path graph, there are multiple limiting distributions:

15.0 μs

xxxxxxxxxx
 
distribution_animation(path_graph(20), 10, 200, 4, "path graph")

2.6 s

xxxxxxxxxx
 
distribution_animation(path_graph(20), 1, 800, 8, "path graph")

4.1 s

xxxxxxxxxx
 
plot_graph_laplacian_low(circle_graph(10)[1], circle_graph(10)[2])

22.4 ms

xxxxxxxxxx
 
distribution_animation(circle_graph(10), 5, 50, 2, "circle graph")

1.1 s

xxxxxxxxxx
 
distribution_animation(circle_graph(10), 4, 50, 2, "circle graph")

1.1 s

For circle graphs with an even number of vertices, there are also multiple limiting distributions, as illustrated in the plots above.

However, for circle graphs with an odd number of vertices, there is a unique limiting distribution for our random walk, namely, the uniform distribution:

7.3 μs

xxxxxxxxxx
 
distribution_animation(circle_graph(9), 5, 120, 4, "circle graph")

1.2 s

xxxxxxxxxx
 
plot_graph_laplacian_low(mtr_edges, mtr_index)

165 ms

There is also a unique limiting distribution for the random walk on the MTR graph:

7.5 μs

xxxxxxxxxx
 
distribution_animation(mtr_graph(2020), "Chai Wan", 2000, 40, "MTR graph")

2.3 s

This distribution is actually very simple: the limiting probabilities are proportional to the degrees of the vertices in the graph! We check this below:

7.9 μs

graph_degree (generic function with 1 method)

xxxxxxxxxx
 
function graph_degree(edges, index, i)
    return sum(graph_adjacency_matrix(edges, index)[:, index[i]])
end

47.9 μs

plot_degree_distribution (generic function with 1 method)

xxxxxxxxxx
 
function plot_degree_distribution(edges, index, title)
    degrees = zeros(length(index), 1)
    for (s, i)=index
        degrees[i] = graph_degree(edges, index, s)
    end
    distribution = degrees / sum(degrees)
    plot(title="Vertex degrees for " * title * " divided by number of edges",
         distribution, ylim=(0.0, maximum(distribution) + 0.05), legend=false)
end

59.6 μs

xxxxxxxxxx
 
plot_degree_distribution(mtr_edges, mtr_index, "MTR graph")

534 ms

This a general phenomenon. Compare the following with the limiting distribution plotted above:

24.9 μs

xxxxxxxxxx
 
plot_degree_distribution(test_edges, test_index, "test graph")

39.2 ms

One more interesting phenomenon: even if there is a unique limiting distribution, how fast our random walk converges to this distribution can still vary signficantly depending on which vertex the walk starts at.

10.6 μs

plot_convergence (generic function with 1 method)

xxxxxxxxxx
 
function plot_convergence(edges, index, starts, steps, stepsize)
    limit, is_unique = limiting_distribution(edges, index, stepsize)
    @assert is_unique
    
    p = plot(
        title="Log-mean-square distance to limiting distribution", 
        xlabel="Steps"
    )
    
    for start=starts
        convergence = []
        for i=1:stepsize:steps
            dist, _ = path_distribution(edges, index, start, i)
            append!(convergence, norm(dist - limit))
        end
        plot!(log.(convergence), label="Random walk starts at " * string(start))
    end
    return p
end

63.3 μs

xxxxxxxxxx
 
plot_convergence(mtr_edges, mtr_index, 
    ["Lai King", "Central", "North Point", "Sha Tin"], 800, 2)

2.7 s

xxxxxxxxxx
 
plot_convergence(test_edges, test_index, [1, 2, 3, 4, 5, 6], 40, 2)

56.7 ms