Notebook 14 – Math 2121, Fall 2020
In this notebook we will explore an application of eigenvalues and eigenvectors to drawing graphs (in the sense of discrete mathematics, rather than calculus) and to ranking the importance of nodes in a network (specifically, the MTR system in Hong Kong).
xxxxxxxxxx
using LinearAlgebra
xxxxxxxxxx
using Plots
A (simple, undirected) graph is a set of vertices
plot_graph (generic function with 1 method)
xxxxxxxxxx
function plot_graph(edges, index, xy)
# helper method for plotting graphs, which uses the columns of xy
# (which should be a 2-by-n matrix where n = number of vertices in graph)
# as the vertex locations
p = plot(grid=nothing, axis=nothing, foreground_color_subplot="white",
aspect_ratio=:equal)
for (v, adjacent) in edges
if haskey(index, v)
i = index[v]
x, y = xy[1, i], xy[2, i]
for w in adjacent
if haskey(index, w)
j = index[w]
dx, dy = xy[1, j] - x, xy[2, j] - y
quiver!(quiver = ([dx],[dy]), [x], [y], color=:blue)
end
end
end
end
scatter!(xy[1,:], xy[2,:], markersize = 5, legend=false)
return p
end
plot_graph_random (generic function with 1 method)
xxxxxxxxxx
function plot_graph_random(edges, index)
# this method just passes a random 2-row vector xy to the plot_graph function
xy = rand(2, length(index))
plot_graph(edges, index, xy)
end
graph_index (generic function with 1 method)
xxxxxxxxxx
function graph_index(edges)
vertices = [v for (v, k)=edges]
degrees = [(-length(k), v) for (v, k)=edges]
perm = sortperm(degrees)
return Dict([degrees[perm[i]][2] => i for i=1:length(perm)])
end
For example, here is a graph with 6 vertices and 7 edges:
xxxxxxxxxx
begin
example_edges = Dict(
1 => [2, 5],
2 => [1, 3, 5],
3 => [2, 4],
4 => [3, 5, 6],
5 => [1, 2, 4],
6 => [4])
example_index = Dict([i => i for i=1:6])
plot_graph_random(example_edges, example_index)
end
The adjacency matrix of a graph with vertices
The degree of a vertex
This is the sum of the entries in column
graph_adjacency_matrix (generic function with 1 method)
xxxxxxxxxx
function graph_adjacency_matrix(edges, index)
n = length(index)
A = zeros(Int64, n, n)
for (v, adjacent) in edges
if haskey(index, v)
for w in adjacent
if haskey(index, w)
i = index[v]
j = index[w]
A[i, j] = 1
end
end
end
end
return A
end
6×6 Array{Int64,2}:
0 1 0 0 1 0
1 0 1 0 1 0
0 1 0 1 0 0
0 0 1 0 1 1
1 1 0 1 0 0
0 0 0 1 0 0
xxxxxxxxxx
A = graph_adjacency_matrix(example_edges, example_index)
4
3
5
6
2
1
3
5
3
2
4
5
1
2
4
6
4
1
2
5
xxxxxxxxxx
example_edges
The Laplacian matrix of a graph is the matrix
graph_laplacian_matrix (generic function with 1 method)
xxxxxxxxxx
function graph_laplacian_matrix(edges, index)
A = graph_adjacency_matrix(edges, index)
D = Diagonal(A * ones(Int64, length(index)))
return D - A
end
6×6 Array{Int64,2}:
2 -1 0 0 -1 0
-1 3 -1 0 -1 0
0 -1 2 -1 0 0
0 0 -1 3 -1 -1
-1 -1 0 -1 3 0
0 0 0 -1 0 1
xxxxxxxxxx
L = graph_laplacian_matrix(example_edges, example_index)
Some interesting facts:
The eigenvalues of the adjacency matrix
are always all real.The eigenvalues of the Laplacian matrix
are all real and nonnegative.The matrix
always has as an eigenvalue.The multiplicity of the
-eigenvalue of is the number of connected components of the graph.
-2.13639
-1.20608
-0.540632
0.261144
1.08247
2.53948
xxxxxxxxxx
eigen(A).values
0.0
0.721586
1.68257
3.0
3.70462
4.89122
xxxxxxxxxx
# the first eigenvalue is zero, but looks negative due to floating point limitations
eigen(L).values
random_graph (generic function with 1 method)
xxxxxxxxxx
function random_graph(n, p)
# creates a random graph with n vertices
edges = Dict(i => [] for i=1:n)
for i=1:n
for j=i+1:n
if rand() < p
append!(edges[i], j)
append!(edges[j], i)
end
end
end
return edges, graph_index(edges)
end
connected_components (generic function with 1 method)
xxxxxxxxxx
function connected_components(edges, index)
# computes connected components using graph Laplacian matrix
L = graph_laplacian_matrix(edges, index)
ans = 0
for v=eigen(L).values
if abs(v) < 1e-14
ans += 1
end
end
return ans
end
xxxxxxxxxx
begin
redges, rindex = random_graph(40, 0.07)
plot_graph_random(redges, rindex)
end
3
xxxxxxxxxx
connected_components(redges, rindex)
Here is an interesting algorithm for drawing a complicated graph:
Compute the adjacency matrix
or Laplacian matrix .Take two distinct eigenvectors
and for this matrix.Rescale these eigenvectors to have the same length.
Use the coordinates
, , , for the vertices.Often what works well is to use the two eigenvectors for
with smallest nonzero eigenvalue.Another choice is the use the two eigenvectors for
with the largest eigenvalues.
path_graph (generic function with 1 method)
xxxxxxxxxx
function path_graph(n)
# creates path graph with n vertices
index = Dict(i => i for i=1:n)
edges = Dict(i => (i == n ? [n - 1] : i == 1 ? [2] : [i - 1, i + 1]) for i=1:n)
return edges, index
end
circle_graph (generic function with 1 method)
xxxxxxxxxx
function circle_graph(n)
# creates circle graph with n vertices
index = Dict(i => i for i=1:n)
edges = Dict(i => [i == n ? 1 : i + 1, i == 1 ? n : i - 1] for i=1:n)
return edges, index
end
complete_graph (generic function with 1 method)
xxxxxxxxxx
function complete_graph(n)
# creates complete graph with n vertices
index = Dict(i => i for i=1:n)
edges = Dict(i => [j for j=1:n if i != j] for i=1:n)
return edges, index
end
connected_random_graph (generic function with 2 methods)
xxxxxxxxxx
function connected_random_graph(n, p, trials=1000)
# attempts to create a random graph with one connected component
for iter=1:trials
e, i = random_graph(n, p)
if connected_components(e, i) == 1
return e, i
end
end
return random_graph(n, p)
end
The following method creates the graph representing the station network for Hong Kong's MTR system, as it existed for a given input year.
mtr_graph (generic function with 2 methods)
xxxxxxxxxx
begin
function reverse_index(index)
return Dict(v => k for (k, v) in index)
end
function get_year(s)
baseline = -2208988800 # this the UTC time for 1 Jan 1900 in seconds
year = 365 * 24 * 60 * 60 # seconds in a year
return 1900 + (s - baseline) / year
end
function subgraph(index, vertices)
ans = Dict()
rindex = reverse_index(index)
c = 1
for i=1:length(rindex)
v = rindex[i]
if v in vertices
ans[v] = c
c += 1
end
end
return ans
end
function restrict_before_year(edges, dates, year)
return Dict([
v => [u for u=e if get_year(dates[u]) <= year ]
for (v, e) = edges if get_year(dates[v]) <= year
])
end
function mtr_graph(year=2020)
edges = Dict(["East Tsim Sha Tsui" => ["Austin", "Hung Hom", "Tsim Sha Tsui"], "Tsim Sha Tsui" => ["East Tsim Sha Tsui", "Jordan", "Admiralty"], "Central" => ["Hong Kong", "Sheung Wan", "Admiralty"], "Hong Kong" => ["Central", "Kowloon"], "Lo Wu" => ["Sheung Shui"], "Lok Ma Chau" => ["Sheung Shui"], "Sheung Shui" => ["Lo Wu", "Lok Ma Chau", "Fanling"], "Fanling" => ["Tai Wo", "Sheung Shui"], "Tai Wo" => ["Tai Po Market", "Fanling"], "Tai Po Market" => ["University", "Tai Wo"], "University" => ["Tai Po Market", "Racecourse", "Fo Tan"], "Racecourse" => ["University", "Sha Tin"], "Fo Tan" => ["University", "Sha Tin"], "Sha Tin" => ["Fo Tan", "Tai Wai", "Racecourse"], "Tai Wai" => ["Kowloon Tong", "Sha Tin", "Hin Keng", "Che Kung Temple"], "Kowloon Tong" => ["Mong Kok East", "Lok Fu", "Shek Kip Mei", "Tai Wai"], "Mong Kok East" => ["Kowloon Tong", "Hung Hom"], "Hung Hom" => ["East Tsim Sha Tsui", "Mong Kok East"], "Whampoa" => ["Ho Man Tin"], "Ho Man Tin" => ["Whampoa", "Yau Ma Tei"], "Yau Ma Tei" => ["Ho Man Tin", "Jordan", "Mong Kok"], "Mong Kok" => ["Prince Edward", "Yau Ma Tei"], "Prince Edward" => ["Sham Shui Po", "Shek Kip Mei", "Mong Kok"], "Shek Kip Mei" => ["Kowloon Tong", "Prince Edward"], "Lok Fu" => ["Kowloon Tong", "Wong Tai Sin"], "Wong Tai Sin" => ["Lok Fu", "Diamond Hill"], "Diamond Hill" => ["Kai Tak", "Hin Keng", "Wong Tai Sin", "Choi Hung"], "Choi Hung" => ["Kowloon Bay", "Diamond Hill"], "Kowloon Bay" => ["Ngau Tau Kok", "Choi Hung"], "Ngau Tau Kok" => ["Kowloon Bay", "Kwun Tong"], "Kwun Tong" => ["Ngau Tau Kok", "Lam Tin"], "Lam Tin" => ["Kwun Tong", "Yau Tong"], "Yau Tong" => ["Tiu Keng Leng", "Quarry Bay", "Lam Tin"], "Tiu Keng Leng" => ["Yau Tong", "Tseung Kwan O"], "Tsuen Wan" => ["Tai Wo Hau"], "Tai Wo Hau" => ["Kwai Hing", "Tsuen Wan"], "Kwai Hing" => ["Kwai Fong", "Tai Wo Hau"], "Kwai Fong" => ["Kwai Hing", "Lai King"], "Lai King" => ["Mei Foo", "Nam Cheong", "Kwai Fong", "Tsing Yi"], "Mei Foo" => ["Tsuen Wan West", "Lai Chi Kok", "Lai King", "Nam Cheong"], "Lai Chi Kok" => ["Mei Foo", "Cheung Sha Wan"], "Cheung Sha Wan" => ["Lai Chi Kok", "Sham Shui Po"], "Sham Shui Po" => ["Prince Edward", "Cheung Sha Wan"], "Jordan" => ["Tsim Sha Tsui", "Yau Ma Tei"], "Admiralty" => ["Central", "Ocean Park", "Wan Chai", "Tsim Sha Tsui"], "Kennedy Town" => ["HKU"], "HKU" => ["Kennedy Town", "Sai Ying Pun"], "Sai Ying Pun" => ["Sheung Wan", "HKU"], "Sheung Wan" => ["Central", "Sai Ying Pun"], "Wan Chai" => ["Causeway Bay", "Admiralty"], "Causeway Bay" => ["Tin Hau", "Wan Chai"], "Tin Hau" => ["Causeway Bay", "Fortress Hill"], "Fortress Hill" => ["Tin Hau", "North Point"], "North Point" => ["Fortress Hill", "Quarry Bay"], "Quarry Bay" => ["North Point", "Yau Tong", "Tai Koo"], "Tai Koo" => ["Quarry Bay", "Sai Wan Ho"], "Sai Wan Ho" => ["Tai Koo", "Shau Kei Wan"], "Shau Kei Wan" => ["Heng Fa Chuen", "Sai Wan Ho"], "Heng Fa Chuen" => ["Shau Kei Wan", "Chai Wan"], "Chai Wan" => ["Heng Fa Chuen"], "Tung Chung" => ["Sunny Bay"], "Sunny Bay" => ["Tung Chung", "Disneyland Resort", "Tsing Yi"], "Tsing Yi" => ["Airport", "Lai King", "Sunny Bay", "Kowloon"], "Nam Cheong" => ["Olympic", "Lai King", "Austin", "Mei Foo"], "Olympic" => ["Nam Cheong", "Kowloon"], "Kowloon" => ["Olympic", "Hong Kong", "Tsing Yi"], "AsiaWorld–Expo" => ["Airport"], "Airport" => ["AsiaWorld–Expo", "Tsing Yi"], "Po Lam" => ["Hang Hau"], "Hang Hau" => ["Po Lam", "Tseung Kwan O"], "LOHAS Park" => ["Tseung Kwan O"], "Tseung Kwan O" => ["Tiu Keng Leng", "LOHAS Park", "Hang Hau"], "Tuen Mun" => ["Siu Hong"], "Siu Hong" => ["Tin Shui Wai", "Tuen Mun"], "Tin Shui Wai" => ["Long Ping", "Siu Hong"], "Long Ping" => ["Tin Shui Wai", "Yuen Long"], "Yuen Long" => ["Kam Sheung Road", "Long Ping"], "Kam Sheung Road" => ["Tsuen Wan West", "Yuen Long"], "Tsuen Wan West" => ["Kam Sheung Road", "Mei Foo"], "Austin" => ["East Tsim Sha Tsui", "Nam Cheong"], "Wu Kai Sha" => ["Ma On Shan"], "Ma On Shan" => ["Heng On", "Wu Kai Sha"], "Heng On" => ["Tai Shui Hang", "Ma On Shan"], "Tai Shui Hang" => ["Heng On", "Shek Mun"], "Shek Mun" => ["Tai Shui Hang", "City One"], "City One" => ["Shek Mun", "Sha Tin Wai"], "Sha Tin Wai" => ["Che Kung Temple", "City One"], "Che Kung Temple" => ["Tai Wai", "Sha Tin Wai"], "Hin Keng" => ["Diamond Hill", "Tai Wai"], "Kai Tak" => ["Diamond Hill"], "Disneyland Resort" => ["Sunny Bay"], "South Horizons" => ["Lei Tung"], "Lei Tung" => ["Wong Chuk Hang", "South Horizons"], "Wong Chuk Hang" => ["Ocean Park", "Lei Tung"], "Ocean Park" => ["Wong Chuk Hang", "Admiralty"]])
dates = Dict(["Lo Wu" => -637977600, "Lok Ma Chau" => 1187136000, "Sheung Shui" => -1250640000, "Fanling" => -1869868800, "Tai Wo" => 610675200, "Tai Po Market" => 418521600, "University" => -418780800, "Racecourse" => 276566400, "Fo Tan" => 477273600, "Sha Tin" => -1869868800, "Tai Wai" => 429753600, "Kowloon Tong" => 307584000, "Mong Kok East" => -1869868800, "Hung Hom" => 186537600, "Whampoa" => 1477180800, "Ho Man Tin" => 1477180800, "Yau Ma Tei" => 315446400, "Mong Kok" => 315446400, "Prince Edward" => 389836800, "Shek Kip Mei" => 307584000, "Lok Fu" => 307584000, "Wong Tai Sin" => 307584000, "Diamond Hill" => 307584000, "Choi Hung" => 307584000, "Kowloon Bay" => 307584000, "Ngau Tau Kok" => 307584000, "Kwun Tong" => 307584000, "Lam Tin" => 623203200, "Yau Tong" => 1028419200, "Tiu Keng Leng" => 1029628800, "Tsuen Wan" => 389836800, "Tai Wo Hau" => 389836800, "Kwai Hing" => 389836800, "Kwai Fong" => 389836800, "Lai King" => 389836800, "Mei Foo" => 390441600, "Lai Chi Kok" => 390441600, "Cheung Sha Wan" => 390441600, "Sham Shui Po" => 390441600, "Jordan" => 315446400, "Tsim Sha Tsui" => 315446400, "Admiralty" => 319161600, "Central" => 319161600, "Kennedy Town" => 1419724800, "HKU" => 1419724800, "Sai Ying Pun" => 1427587200, "Sheung Wan" => 517190400, "Wan Chai" => 486345600, "Causeway Bay" => 486345600, "Tin Hau" => 486345600, "Fortress Hill" => 486345600, "North Point" => 486345600, "Quarry Bay" => 486345600, "Tai Koo" => 486345600, "Sai Wan Ho" => 486345600, "Shau Kei Wan" => 486345600, "Heng Fa Chuen" => 486345600, "Chai Wan" => 486345600, "Tung Chung" => 898387200, "Sunny Bay" => 1117584000, "Tsing Yi" => 898387200, "Nam Cheong" => 1071878400, "Olympic" => 898387200, "Kowloon" => 898387200, "Hong Kong" => 898387200, "AsiaWorld–Expo" => 1135036800, "Airport" => 899683200, "Po Lam" => 1029628800, "Hang Hau" => 1029628800, "LOHAS Park" => 1248566400, "Tseung Kwan O" => 1029628800, "Tuen Mun" => 1071878400, "Siu Hong" => 1071878400, "Tin Shui Wai" => 1071878400, "Long Ping" => 1071878400, "Yuen Long" => 1071878400, "Kam Sheung Road" => 1071878400, "Tsuen Wan West" => 1071878400, "Austin" => 1250380800, "East Tsim Sha Tsui" => 1098576000, "Wu Kai Sha" => 1103587200, "Ma On Shan" => 1103587200, "Heng On" => 1103587200, "Tai Shui Hang" => 1103587200, "Shek Mun" => 1103587200, "City One" => 1103587200, "Sha Tin Wai" => 1103587200, "Che Kung Temple" => 1103587200, "Hin Keng" => 1581638400, "Kai Tak" => 1581638400, "Disneyland Resort" => 1122854400, "South Horizons" => 1482883200, "Lei Tung" => 1482883200, "Wong Chuk Hang" => 1482883200, "Ocean Park" => 1482883200])
edges = restrict_before_year(edges, dates, year + 1)
index = graph_index(edges)
return edges, index
end
end
plot_graph_adjacency_low (generic function with 1 method)
xxxxxxxxxx
function plot_graph_adjacency_low(edges, index)
# plots graph using lowest eigenvectors of adjacency matrix as xy-coordinates
n = length(index)
A = graph_adjacency_matrix(edges, index)
xy = transpose(eigen(A).vectors[:,1:2])
plot_graph(edges, index, xy)
end
plot_graph_adjacency_high (generic function with 1 method)
xxxxxxxxxx
function plot_graph_adjacency_high(edges, index)
# plots graph using highest eigenvectors of adjacency matrix as xy-coordinates
n = length(index)
A = graph_adjacency_matrix(edges, index)
xy = transpose(eigen(A).vectors[:,end - 1:end])
plot_graph(edges, index, xy)
end
plot_graph_laplacian_low (generic function with 1 method)
xxxxxxxxxx
function plot_graph_laplacian_low(edges, index)
# plots graph using lowest nonzero eigenvectors of Laplacian as xy-coordinates
n = length(index)
A = graph_laplacian_matrix(edges, index)
c = 2
eig = eigen(A).values
while c + 1 < n && abs(eig[c]) < 10e-14
c += 1
end
xy = transpose(eigen(A).vectors[:, c:c+1])
plot_graph(edges, index, xy)
end
plot_graph_laplacian_high (generic function with 1 method)
xxxxxxxxxx
function plot_graph_laplacian_high(edges, index)
# plots graph using highest eigenvectors of Laplacian as xy-coordinates
n = length(index)
A = graph_laplacian_matrix(edges, index)
xy = transpose(eigen(A).vectors[:, end - 1:end])
plot_graph(edges, index, xy)
end
get_adjacency_plots (generic function with 1 method)
xxxxxxxxxx
function get_adjacency_plots(e, i)
plot(plot_graph_adjacency_low(e, i),
plot_graph_adjacency_high(e, i))
end
get_laplacian_plots (generic function with 1 method)
xxxxxxxxxx
function get_laplacian_plots(e, i)
plot(plot_graph_laplacian_low(e, i),
plot_graph_laplacian_high(e, i))
end
Here is our starting graph from the example above, plotted randomly:
xxxxxxxxxx
begin
test_edges = example_edges
test_index = example_index
plot_graph_random(test_edges, test_index)
end
Here is the same graph, plotted using eigenvector methods based on the adjacency matrix:
xxxxxxxxxx
get_adjacency_plots(test_edges, test_index)
Here is the same graph, plotted using eigenvector methods based on the Laplacian matrix:
xxxxxxxxxx
get_laplacian_plots(test_edges, test_index)
In my opinion the plot using the eigenvectors of the Laplacian for the lowest nonzero eigenvalues as xy-coordinates gives the 'best' output.
Here are a few more examples to underscore this claim:
xxxxxxxxxx
begin
path_edges, path_index = path_graph(40)
plot_graph_random(path_edges, path_index)
end
xxxxxxxxxx
get_adjacency_plots(path_edges, path_index)
xxxxxxxxxx
get_laplacian_plots(path_edges, path_index)
xxxxxxxxxx
begin
circle_edges, circle_index = circle_graph(30)
plot_graph_random(circle_edges, circle_index)
end
xxxxxxxxxx
get_adjacency_plots(circle_edges, circle_index)
xxxxxxxxxx
get_laplacian_plots(circle_edges, circle_index)
xxxxxxxxxx
begin
complete_edges, complete_index = complete_graph(9)
plot_graph_random(complete_edges, complete_index)
end
xxxxxxxxxx
get_adjacency_plots(complete_edges, complete_index)
xxxxxxxxxx
get_laplacian_plots(complete_edges, complete_index)
Remark: our eigenvector methods do not give such great pictures of the complete graph on
xxxxxxxxxx
begin
connected_edges, connected_index = connected_random_graph(10, 0.1, 10000)
plot_graph_random(connected_edges, connected_index)
end
xxxxxxxxxx
get_adjacency_plots(connected_edges, connected_index)
xxxxxxxxxx
get_laplacian_plots(connected_edges, connected_index)
"Kwai Fong"
"Kwai Hing"
"Lai King"
"Kwun Tong"
"Ngau Tau Kok"
"Lam Tin"
"Whampoa"
"Ho Man Tin"
"Ngau Tau Kok"
"Kowloon Bay"
"Kwun Tong"
"Mei Foo"
"Tsuen Wan West"
"Lai Chi Kok"
"Lai King"
"Nam Cheong"
"Long Ping"
"Tin Shui Wai"
"Yuen Long"
"Tung Chung"
"Sunny Bay"
"Chai Wan"
"Heng Fa Chuen"
"Austin"
"East Tsim Sha Tsui"
"Nam Cheong"
"Tai Wai"
"Kowloon Tong"
"Sha Tin"
"Hin Keng"
"Che Kung Temple"
"Heng On"
"Tai Shui Hang"
"Ma On Shan"
"Tuen Mun"
"Siu Hong"
"Central"
"Hong Kong"
"Sheung Wan"
"Admiralty"
"Shek Kip Mei"
"Kowloon Tong"
"Prince Edward"
"Kwai Hing"
"Kwai Fong"
"Tai Wo Hau"
"Sai Ying Pun"
"Sheung Wan"
"HKU"
"Wan Chai"
"Causeway Bay"
"Admiralty"
"Diamond Hill"
"Kai Tak"
"Hin Keng"
"Wong Tai Sin"
"Choi Hung"
"Ocean Park"
"Wong Chuk Hang"
"Admiralty"
"Wong Chuk Hang"
"Ocean Park"
"Lei Tung"
"AsiaWorld–Expo"
"Airport"
"North Point"
"Fortress Hill"
"Quarry Bay"
"Sai Wan Ho"
"Tai Koo"
"Shau Kei Wan"
"Kowloon Bay"
"Ngau Tau Kok"
"Choi Hung"
"Nam Cheong"
"Olympic"
"Lai King"
"Austin"
"Mei Foo"
"Sheung Shui"
"Lo Wu"
"Lok Ma Chau"
"Fanling"
"Tsim Sha Tsui"
"East Tsim Sha Tsui"
"Jordan"
"Admiralty"
"Quarry Bay"
"North Point"
"Yau Tong"
"Tai Koo"
"Yau Tong"
"Tiu Keng Leng"
"Quarry Bay"
"Lam Tin"
"Lai Chi Kok"
"Mei Foo"
"Cheung Sha Wan"
"Mei Foo"
5
"Kwai Fong"
43
"Kwun Tong"
45
"Ngau Tau Kok"
54
"Whampoa"
94
"Long Ping"
50
"Tung Chung"
93
"Austin"
23
"Chai Wan"
82
"Tai Wai"
7
"Heng On"
35
"Tuen Mun"
92
"Central"
9
"Kwai Hing"
44
"Sai Ying Pun"
60
"Shek Kip Mei"
64
"Wan Chai"
77
"Diamond Hill"
2
"Ocean Park"
56
"Wong Chuk Hang"
78
"AsiaWorld–Expo"
81
"North Point"
55
"Sai Wan Ho"
59
"Kowloon Bay"
42
"Nam Cheong"
6
"Sheung Shui"
15
"Quarry Bay"
13
"Tsim Sha Tsui"
18
"Yau Tong"
21
"Lai Chi Kok"
46
xxxxxxxxxx
mtr_edges, mtr_index = mtr_graph(2020) # MTR system as of 2020
95
xxxxxxxxxx
length(mtr_index) # number of stations
"Yau Tong"
"Tseung Kwan O"
xxxxxxxxxx
mtr_edges["Tiu Keng Leng"] # stations adjacent to Tiu Keng Leng, for example
xxxxxxxxxx
plot_graph_random(mtr_edges, mtr_index)
xxxxxxxxxx
get_adjacency_plots(mtr_edges, mtr_index)
xxxxxxxxxx
get_laplacian_plots(mtr_edges, mtr_index)
The bottom left graph vaguely resembles the actual MTR map.
plot_mtr_graph (generic function with 1 method)
xxxxxxxxxx
function plot_mtr_graph(year)
edges, index = mtr_graph(year)
plot_graph_laplacian_low(edges, index)
end
"Lo Wu"
14
"Kwun Tong"
13
"Ngau Tau Kok"
7
"Hung Hom"
12
"Kowloon Tong"
1
"Fanling"
11
"Shek Kip Mei"
16
"University"
17
"Wong Tai Sin"
10
"Mong Kok East"
6
"Diamond Hill"
3
"Lok Fu"
5
"Sha Tin"
15
"Kowloon Bay"
4
"Choi Hung"
2
"Racecourse"
8
"Sheung Shui"
9
xxxxxxxxxx
mtr_graph(1979)[2] # stations open in 1979
xxxxxxxxxx
plot_mtr_graph(1979)
xxxxxxxxxx
plot_mtr_graph(1981)
xxxxxxxxxx
plot_mtr_graph(1985)
xxxxxxxxxx
plot_mtr_graph(1990)
xxxxxxxxxx
plot_mtr_graph(1998) # we can see some major extension to the MTR system in 1998
xxxxxxxxxx
plot_mtr_graph(2002)
xxxxxxxxxx
plot_mtr_graph(2005)
xxxxxxxxxx
plot_mtr_graph(2007)
xxxxxxxxxx
plot_mtr_graph(2009)
xxxxxxxxxx
plot_mtr_graph(2017)
xxxxxxxxxx
plot_mtr_graph(2020)
Another application:
Take the eigenvector of the adjacency matrix
with largest eigenvalue.This vector can be rescaled to have all nonpositive entries.
These entries determine an order on the vertices of your graph.
This ordering (approximately) will be put important vertices first, unimportant ones last.
rank_vertices (generic function with 1 method)
xxxxxxxxxx
function rank_vertices(edges, index)
A = graph_adjacency_matrix(edges, index)
vec = eigen(A).vectors[:, end]
# rescale to have all nonpositive entries
if maximum(vec) > 1e-12
vec = -vec
end
maximum(vec) <= 1e-12
rind = reverse_index(index)
ranks = [rind[i] for i = sortperm(vec)]
scores = Dict(key => vec[i] / minimum(vec) for (key, i) in index)
return ranks, scores
end
Applying this strategy gives the following ranking of MTR stations, from most important to least important.
Does Lai King make sense as #1?
Certainly Chai Wan, Po Lam, and We Kai Sha are plausible as the least important stations, since each is located at the end of its respective line.
"Lai King"
"Nam Cheong"
"Mei Foo"
"Tsing Yi"
"Olympic"
"Kowloon"
"Kwai Fong"
"Austin"
"Lai Chi Kok"
"Tsuen Wan West"
"Sunny Bay"
"Airport"
"East Tsim Sha Tsui"
"Hong Kong"
"Kwai Hing"
"Cheung Sha Wan"
"Kam Sheung Road"
"Tsim Sha Tsui"
"Central"
"Admiralty"
"Lo Wu"
"Lok Ma Chau"
"Shau Kei Wan"
"Ma On Shan"
"Hang Hau"
"LOHAS Park"
"Heng Fa Chuen"
"Wu Kai Sha"
"Po Lam"
"Chai Wan"
"Mei Foo"
0.903978
"Kwai Fong"
0.392665
"Kwun Tong"
0.000229309
"Ngau Tau Kok"
0.000494898
"Whampoa"
0.00392845
"Long Ping"
0.0215197
"Tung Chung"
0.104997
"Austin"
0.391578
"Chai Wan"
4.53137e-6
"Tai Wai"
0.0213396
"Heng On"
7.81581e-5
"Tuen Mun"
0.00110401
"Central"
0.126188
"Kwai Hing"
0.15378
"Sai Ying Pun"
0.0194052
"Shek Kip Mei"
0.0239
"Wan Chai"
0.0464861
"Diamond Hill"
0.00789618
"Ocean Park"
0.0464627
"Wong Chuk Hang"
0.0181963
"AsiaWorld–Expo"
0.0912432
"North Point"
0.0011613
"Sai Wan Ho"
8.83265e-5
"Kowloon Bay"
0.00122487
"Nam Cheong"
0.945156
"Sheung Shui"
0.00010659
"Quarry Bay"
0.000572624
"Tsim Sha Tsui"
0.128545
"Yau Tong"
0.000296319
"Lai Chi Kok"
0.355898
xxxxxxxxxx
ranks, scores = rank_vertices(mtr_edges, mtr_index)
Here is a method to plot the relative 'eigenvector centrality' of various MTR stations as they evolve over time.
plot_ranking (generic function with 1 method)
xxxxxxxxxx
function plot_ranking(stations)
ans = plot(
title="Eigenvector centrality of MTR stations",
legend = :outertopleft,
xlabel = "Year"
)
for station = stations
series = []
years = []
for year=1910:2020
edges, index = mtr_graph(year)
if haskey(index, station)
rank, score = rank_vertices(edges, index)
rind = reverse_index(Dict(i => rank[i] for i=1:length(rank)))
append!(series, 1.0 - (rind[station] - 1) / (length(index) - 1))
append!(years, year)
end
end
plot!(years, series, label=station, ylim=(0.0, 1.0))
end
return ans
end
xxxxxxxxxx
plot_ranking(["Choi Hung"])
xxxxxxxxxx
plot_ranking(["Kowloon Tong", "Lai King", "Central", "North Point", "Diamond Hill"])
Some observations:
Central continues to become more central to the network.
In the early history of the MTR, Kowloon Tong was the most important station.
Lai King jumps ahead in centrality after the Airport Express line opens (on 6 July 1998).
Increased centrality of Diamond Hill station in 2020 probably due to new Kai Tak station.
One more application. An
such that consecutive edges share an endpoint.
The path starts at vertex
Taking powers of the adjacency matrix lets us count all
The number of 1-step paths between vertices
Similarly, the number of
xxxxxxxxxx
plot_graph_laplacian_low(test_edges, test_index)
4
3
5
6
2
1
3
5
3
2
4
5
1
2
4
6
4
1
2
5
xxxxxxxxxx
test_edges
6×6 Array{Int64,2}: 0 1 0 0 1 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 1 0 1 1 1 1 0 1 0 0 0 0 0 1 0 0
6×6 Array{Int64,2}: 2 1 1 1 1 0 1 3 0 2 1 0 1 0 2 0 2 1 1 2 0 3 0 0 1 1 2 0 3 1 0 0 1 0 1 1
6×6 Array{Int64,2}: 2 4 2 2 4 1 4 2 5 1 6 2 2 5 0 5 1 0 2 1 5 0 6 3 4 6 1 6 2 0 1 2 0 3 0 0
6×6 Array{Int64,2}: 8 8 6 7 8 2 8 15 3 13 7 1 6 3 10 1 12 5 7 13 1 14 3 0 8 7 12 3 16 6 2 1 5 0 6 3
xxxxxxxxxx
A, A^2, A^3, A^4
3
xxxxxxxxxx
# the number of 3-step paths from vertex 6 to vertex 4 in the test graph
(A^3)[4, 6]
6
xxxxxxxxxx
# the number of 4-step paths from vertex 6 to vertex 5 in the test graph
(A^4)[5, 6]
Consider the following modified adjacency matrix
degree_normalized_adjacency_matrix (generic function with 1 method)
xxxxxxxxxx
function degree_normalized_adjacency_matrix(edges, index)
A = graph_adjacency_matrix(edges, index)
D = Diagonal(A * ones(Int64, length(index)))
return A * D^-1
end
The entries in each column of
These probabilities give your chances of being at a givex vertex if you carry out the following random walk on your graph:
Start at vertex
.Choose an adjacent vertex (one connected by an edge) at random and move to that vertex.
Repeat
times.
We can plot this distribution.
path_distribution (generic function with 1 method)
xxxxxxxxxx
function path_distribution(edges, index, i, n)
DNA = degree_normalized_adjacency_matrix(edges, index)
ans = (DNA^n)[:, index[i]]
rind = reverse_index(index)
return ans, [rind[i] => ans[i] for i = reverse(sortperm(ans))]
end
As
Key fact: this limiting distribution is an eigenvector for
If the
It can happen that this eigenspace has dimension greater than one. In this case more than one limiting distribution may exist for our random walk, and which one we end up in depends on the vertex the walk starts at.
limiting_distribution (generic function with 1 method)
xxxxxxxxxx
function limiting_distribution(edges, index, stepsize)
DNA = degree_normalized_adjacency_matrix(edges, index)
eig = eigen(DNA^stepsize)
limit = eig.vectors[:, length(eig.values)]
limit = abs.(limit / sum(limit))
is_unique = length([i for i=eig.values if abs(1.0 - i) < 5e-15]) <= 1
return limit, is_unique
end
distribution_animation (generic function with 1 method)
xxxxxxxxxx
function distribution_animation(edges_index, start, steps, stepsize, title)
edges, index = edges_index
limit, is_unique = limiting_distribution(edges, index, stepsize)
anim = for i=1:stepsize:steps
dist, _ = path_distribution(edges, index, start, i)
plot(dist,
label="Distribution after $(i) steps",
title="Convergence to limit distribution for walk on " * title,
color=:blue,
ylim = (0.0, maximum(limit) + 0.05)
)
if is_unique
plot!(limit, color=:orange, label="Limiting distribution")
end
end every 1
gif(anim, "anim_fps15.gif", fps = 5)
end
xxxxxxxxxx
distribution_animation((test_edges, test_index), 1, 30, 2, "test graph")
xxxxxxxxxx
plot_graph_laplacian_low(path_graph(20)[1], path_graph(20)[2])
On the path graph, there are multiple limiting distributions:
xxxxxxxxxx
distribution_animation(path_graph(20), 10, 200, 4, "path graph")
xxxxxxxxxx
distribution_animation(path_graph(20), 1, 800, 8, "path graph")
xxxxxxxxxx
plot_graph_laplacian_low(circle_graph(10)[1], circle_graph(10)[2])
xxxxxxxxxx
distribution_animation(circle_graph(10), 5, 50, 2, "circle graph")
xxxxxxxxxx
distribution_animation(circle_graph(10), 4, 50, 2, "circle graph")
For circle graphs with an even number of vertices, there are also multiple limiting distributions, as illustrated in the plots above.
However, for circle graphs with an odd number of vertices, there is a unique limiting distribution for our random walk, namely, the uniform distribution:
xxxxxxxxxx
distribution_animation(circle_graph(9), 5, 120, 4, "circle graph")
xxxxxxxxxx
plot_graph_laplacian_low(mtr_edges, mtr_index)
There is also a unique limiting distribution for the random walk on the MTR graph:
xxxxxxxxxx
distribution_animation(mtr_graph(2020), "Chai Wan", 2000, 40, "MTR graph")
This distribution is actually very simple: the limiting probabilities are proportional to the degrees of the vertices in the graph! We check this below:
graph_degree (generic function with 1 method)
xxxxxxxxxx
function graph_degree(edges, index, i)
return sum(graph_adjacency_matrix(edges, index)[:, index[i]])
end
plot_degree_distribution (generic function with 1 method)
xxxxxxxxxx
function plot_degree_distribution(edges, index, title)
degrees = zeros(length(index), 1)
for (s, i)=index
degrees[i] = graph_degree(edges, index, s)
end
distribution = degrees / sum(degrees)
plot(title="Vertex degrees for " * title * " divided by number of edges",
distribution, ylim=(0.0, maximum(distribution) + 0.05), legend=false)
end
xxxxxxxxxx
plot_degree_distribution(mtr_edges, mtr_index, "MTR graph")
This a general phenomenon. Compare the following with the limiting distribution plotted above:
xxxxxxxxxx
plot_degree_distribution(test_edges, test_index, "test graph")
One more interesting phenomenon: even if there is a unique limiting distribution, how fast our random walk converges to this distribution can still vary signficantly depending on which vertex the walk starts at.
plot_convergence (generic function with 1 method)
xxxxxxxxxx
function plot_convergence(edges, index, starts, steps, stepsize)
limit, is_unique = limiting_distribution(edges, index, stepsize)
is_unique
p = plot(
title="Log-mean-square distance to limiting distribution",
xlabel="Steps"
)
for start=starts
convergence = []
for i=1:stepsize:steps
dist, _ = path_distribution(edges, index, start, i)
append!(convergence, norm(dist - limit))
end
plot!(log.(convergence), label="Random walk starts at " * string(start))
end
return p
end
xxxxxxxxxx
plot_convergence(mtr_edges, mtr_index,
["Lai King", "Central", "North Point", "Sha Tin"], 800, 2)
xxxxxxxxxx
plot_convergence(test_edges, test_index, [1, 2, 3, 4, 5, 6], 40, 2)