Statistical-Learning-Method.../Page_Rank/Page_Rank.ipynb
2021-01-26 17:09:38 +08:00

171 lines
4.7 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### PageRank算法\n",
"以下图所示的有向图为例计算每个结点的PR\n",
"<img style=\"float: center;\" src=\"directed_graph.png\" width=\"20%\">"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"\n",
"n = 7 #有向图中一共有7个节点\n",
"d = 0.85 #阻尼因子根据经验值确定,这里我们随意给一个值\n",
"M = np.array([[0, 1/4, 1/3, 0, 0, 1/2, 0],\n",
" [1/4, 0, 0, 1/5, 0, 0, 0],\n",
" [0, 1/4, 0, 1/5, 1/4, 0, 0],\n",
" [0, 0, 1/3, 0, 1/4, 0, 0],\n",
" [1/4, 0, 0, 1/5, 0, 0, 0],\n",
" [1/4, 1/4, 0, 1/5, 1/4, 0, 0],\n",
" [1/4, 1/4, 1/3, 1/5, 1/4, 1/2, 0]]) #根据有向图中各节点的连接情况写出转移矩阵\n",
"R0 = np.full((7, 1), 1/7) #设置初始向量R0R0是一个7*1的列向量因为有7个节点我们把R0的每一个值都设为1/7\n",
"eps = 0.000001 #设置计算精度"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. PageRank的迭代算法"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"t = 0 #用来累计迭代次数\n",
"R = R0 #对R向量进行初始化\n",
"judge = False #用来判断是否继续迭代\n",
"while not judge:\n",
" next_R = d * np.matmul(M, R) + (1 - d) / n * np.ones((7, 1)) #计算新的R向量\n",
" diff = np.linalg.norm(R - next_R) #计算新的R向量与之前的R向量之间的距离这里采用的是欧氏距离\n",
" if diff < eps: #若两向量之间的距离足够小\n",
" judge = True #则停止迭代\n",
" R = next_R #更新R向量\n",
" t += 1 #迭代次数加一\n",
"R = R / np.sum(R) #对R向量进行规范化保证其总和为1表示各节点的概率分布"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"迭代次数: 24\n",
"PageRank: \n",
" [[0.17030305]\n",
" [0.10568394]\n",
" [0.11441021]\n",
" [0.10629792]\n",
" [0.10568394]\n",
" [0.15059975]\n",
" [0.24702119]]\n"
]
}
],
"source": [
"print('迭代次数:', t)\n",
"print('PageRank: \\n', R)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. PageRank的幂法"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"t = 0 #用来累计迭代次数\n",
"x = R0 #对x向量进行初始化\n",
"judge = False #用来判断是否继续迭代\n",
"A = d * M + (1 - d) / n * np.eye(n) #计算A矩阵其中np.eye(n)用来创建n阶单位阵E\n",
"while not judge:\n",
" next_y = np.matmul(A, x) #计算新的y向量\n",
" next_x = next_y / np.linalg.norm(next_y) #对新的y向量规范化得到新的x向量\n",
" diff = np.linalg.norm(x - next_x) #计算新的x向量与之前的x向量之间的距离这里采用的是欧氏距离\n",
" if diff < eps: #若两向量之间的距离足够小\n",
" judge = True #则停止迭代\n",
" R = x #得到R向量\n",
" x = next_x #更新x向量\n",
" t += 1 #迭代次数加一\n",
"R = R / np.sum(R) #对R向量进行规范化保证其总和为1表示各节点的概率分布"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"迭代次数: 25\n",
"PageRank: \n",
" [[0.18860772]\n",
" [0.09038084]\n",
" [0.0875305 ]\n",
" [0.07523049]\n",
" [0.09038084]\n",
" [0.15604764]\n",
" [0.31182196]]\n"
]
}
],
"source": [
"print('迭代次数:', t)\n",
"print('PageRank: \\n', R)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}