# 数据库设计与实现

数据库使用 MySQL.

# 基础数据表

因为系统没有实现管理员系统, 所以 legal 字段没有发挥实际的作用, 但是应该被正确地设置.

spider 字段在爬虫中起效, 本质上是原来今日头条的数据主键值.

# Customer 用户表

cus_id INT UNSIGNED NOT NULL auto_increment,
cus_name VARCHAR(64) UNIQUE,
cus_pass VARCHAR(255),
-- 爬虫中用于识别用户
cus_spider VARCHAR(64) default '',
-- 用户头像的 url
cus_avatar_url VARCHAR(255) default 'http://localhost:8080/img/Man.png',
-- 用户的个人描述
cus_style VARCHAR(255) default '这个人很懒, 什么都没写',
-- cus_gender 为 0 时性别未知, 为 1 时为男, 为 -1 时为女
cus_gender TINYINT DEFAULT 0,
-- 用户的创建时间
cus_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-- cus_legal 为 0 时待审核, 为 1 时合法, 为 -1 不合法
cus_legal TINYINT default 0,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

系统不允许用户创建同名用户. 这张表也就前四个字段比较有用, 剩下的基本都是残废.

# Article 新闻表

art_id INT UNSIGNED NOT NULL auto_increment,
art_title VARCHAR(255) default '',
art_content TEXT,
-- 在爬虫中分辨文章
art_spider VARCHAR(64) default '',
-- 文章的分类
art_type VARCHAR(32),
-- 文章的标签 应该以 & 分隔
art_tags VARCHAR(128) default '',
-- 文章缩略图的信息
art_image_url VARCHAR(255) default '',
art_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
art_legal tinyint default 0,
art_cus_id INT UNSIGNED,
1
2
3
4
5
6
7
8
9
10
11
12
13
14

这里 art_tags 计划使用来储存新闻的关键词, 后来没用上.

# Comment 评论表

com_id INT UNSIGNED NOT NULL auto_increment,
com_content TEXT,
com_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
com_legal tinyint default 0,
-- 爬虫过程中的评论标识
com_spider varchar(64) default '',
com_cus_id INT UNSIGNED,
com_art_id INT UNSIGNED, 
1
2
3
4
5
6
7
8

# Reply 回复表

rep_id INT UNSIGNED NOT NULL auto_increment,
rep_content TEXT,
-- 回复的类型, 0 是对评论的回复, 1 是对回复的回复
rep_type tinyint default 0, 
rep_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
rep_legal tinyint default 0,
-- 爬虫过程中的评论标识
rep_spider varchar(64) default '',
rep_cus_id INT UNSIGNED,
rep_art_id INT UNSIGNED,
rep_com_id INT UNSIGNED, 
rep_rep_id INT UNSIGNED,
1
2
3
4
5
6
7
8
9
10
11
12

回复需要考虑对评论的回复与对回复的回复. 当 rep_type 为 0 时, rep_rep_id 将为空.

# 数据统计与行为记录表

# CusBehaviorRecord 用户行为记录表

这是整个数据库中最复杂的表, 记录了所有的用户行为.

cbr_id INT UNSIGNED NOT NULL auto_increment,
-- 用户 攻
cbr_cus_id_from INT UNSIGNED,
-- 用户 受
cbr_cus_id_to INT UNSIGNED,
-- 行为类别
cbr_behavior INT default 0,
-- 行为时间
cbr_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-- 行为发生的文章 ID
cbr_art_id INT UNSIGNED,
-- 行为发生的位置代号, 0: 无发生, 1: 文章, 2: 评论, 3:回复
cbr_type TINYINT DEFAULT 0,
-- target ID
cbr_target_id INT UNSIGNED,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

# ArtFeatureCount 新闻特征统计记录表

这张表统计了每篇新闻下的用户行为数据.

afc_like_num INT UNSIGNED default 0,
afc_dislike_num INT UNSIGNED default 0,
afc_com_num INT UNSIGNED default 0,
afc_rep_num INT UNSIGNED default 0,
afc_read_num INT UNSIGNED default 0,
afc_art_time TIMESTAMP,
1
2
3
4
5
6

其中, afc_like_numafc_dislike_num 即新闻的点赞与点踩数量没有被使用.

# CusFeatureCount 用户特征统计记录表

这张表记录了每个用户在 18 种不同类别下的用户行为数量.

cfc_news_society INT UNSIGNED default 0,
cfc_news_entertainment INT UNSIGNED default 0,
cfc_news_tech INT UNSIGNED default 0,
cfc_news_military INT UNSIGNED default 0,
cfc_news_sports INT UNSIGNED default 0,
cfc_news_finance INT UNSIGNED default 0,
cfc_news_world INT UNSIGNED default 0,
cfc_news_fashion INT UNSIGNED default 0,
cfc_news_travel INT UNSIGNED default 0,
cfc_news_discovery INT UNSIGNED default 0,
cfc_news_baby INT UNSIGNED default 0,
cfc_news_regimen INT UNSIGNED default 0,
cfc_news_story INT UNSIGNED default 0,
cfc_news_essay INT UNSIGNED default 0,
cfc_news_game INT UNSIGNED default 0,
cfc_news_history INT UNSIGNED default 0,
cfc_news_food INT UNSIGNED default 0,
cfc_news_car INT UNSIGNED default 0,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

# 数据视图

这个我要非常感谢我同学也是我同事 -- "鲁大师" 的建议, 利用视图来计算新闻的排分. 系统只设计了两个数据视图, 实际上需要更多以适配不同的推荐场景.

# ArtScoreList 通用新闻排分表

DROP VIEW IF EXISTS NR.ArtScoreList;
create view NR.ArtScoreList(asl_art_id, asl_art_score) as
    select
        afc_art_id,
        timestampdiff(HOUR,now(),afc_art_time)*20 + cast(afc_like_num as signed)*4 +
        cast(afc_dislike_num as signed)*(-6) + cast(afc_com_num as signed)*3+
        cast(afc_read_num as signed )*1 + cast(afc_rep_num as signed)*2 as afc_art_socre
    from NR.ArtFeatureCount
    order by afc_art_socre desc;
1
2
3
4
5
6
7
8
9

# ArtTimeList 热点新闻排分表

DROP VIEW IF EXISTS NR.ArtTimeList;
create view NR.ArtTimeList(atl_art_id, atl_art_score) as 
    SELECT
        afc_art_id,
        timestampdiff(HOUR,now(),afc_art_time)*32 + cast(afc_like_num as signed)*4 + 
        cast(afc_dislike_num as signed)*(-6) + cast(afc_com_num as signed)*3 + 
        cast(afc_read_num as signed )*1 + cast(afc_rep_num as signed )*2 as atl_art_score
    from NR.ArtFeatureCount left join NR.Article on NR.ArtFeatureCount.afc_art_id = NR.Article.art_id
    where
        NR.Article.art_type in ("news_society", "news_military", "news_finance", "news_entertainment",
                                "news_game", "news_sports", "news_world", "news_tech", "news_car", "news_fashion") and
        abs(timestampdiff(HOUR,now(),afc_art_time)) < 72
    order by atl_art_score desc;
1
2
3
4
5
6
7
8
9
10
11
12
13
Last Updated: 6/3/2020, 12:36:35 AM