简介:本文深入探讨MySQL搜索关键词表的设计原则、核心字段及高效查询指令,提供完整的建表方案和性能优化策略,帮助开发者构建高性能的搜索系统。
搜索关键词表作为搜索系统的核心数据载体,其设计需遵循以下原则:
CREATE TABLE `search_keywords` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`keyword` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '原始关键词',
`normalized_keyword` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '标准化后的关键词',
`search_count` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '搜索次数',
`last_searched_at` timestamp NULL DEFAULT NULL COMMENT '最后搜索时间',
`created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `idx_normalized_keyword` (`normalized_keyword`),
KEY `idx_search_count` (`search_count`),
KEY `idx_last_searched` (`last_searched_at`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- 热词查询(TOP 100)
SELECT normalized_keyword, search_count
FROM search_keywords
ORDER BY search_count DESC
LIMIT 100;
-- 关键词模糊匹配
SELECT DISTINCT normalized_keyword
FROM search_keywords
WHERE normalized_keyword LIKE 'mysql%'
ORDER BY search_count DESC
LIMIT 10;
-- 使用全文索引实现关联建议
ALTER TABLE search_keywords ADD FULLTEXT INDEX `ft_idx_keyword` (`normalized_keyword`);
SELECT normalized_keyword, MATCH(normalized_keyword) AGAINST('+mysql* data*' IN BOOLEAN MODE) AS relevance
FROM search_keywords
WHERE MATCH(normalized_keyword) AGAINST('+mysql* data*' IN BOOLEAN MODE)
ORDER BY relevance DESC, search_count DESC
LIMIT 5;
-- 使用窗口函数实现实时排名
SELECT
normalized_keyword,
search_count,
RANK() OVER (ORDER BY search_count DESC) AS hot_rank
FROM search_keywords
WHERE last_searched_at > DATE_SUB(NOW(), INTERVAL 1 HOUR)
ORDER BY hot_rank
LIMIT 50;
ALTER TABLE search_keywords ADD INDEX `idx_composite` (`normalized_keyword`, `search_count`);
-- 使用MySQL查询缓存(8.0以下版本)
SELECT SQL_CACHE normalized_keyword, search_count
FROM search_keywords
WHERE normalized_keyword LIKE 'database%';
-- 应用层缓存实现
/* Redis缓存示例 */
SETEX keyword:mysql 3600 "MySQL相关搜索数据"
-- 使用触发器实现搜索数据实时分析
DELIMITER //
CREATE TRIGGER after_keyword_update
AFTER UPDATE ON search_keywords
FOR EACH ROW
BEGIN
IF NEW.search_count % 100 = 0 THEN
INSERT INTO keyword_trend_analysis(keyword_id, search_count, period)
VALUES(NEW.id, NEW.search_count, NOW());
END IF;
END//
DELIMITER ;
-- 带商品类目关联的搜索词表
CREATE TABLE `product_search_log` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`keyword_id` bigint(20) unsigned NOT NULL,
`category_id` int(11) DEFAULT NULL,
`user_id` int(11) DEFAULT NULL COMMENT '去标识化用户ID',
`device_type` enum('mobile','desktop','tablet') DEFAULT NULL,
`created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
KEY `idx_keyword_category` (`keyword_id`,`category_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- 实现时间衰减的热度计算
UPDATE search_keywords
SET search_count = search_count * POW(0.9, DATEDIFF(NOW(), last_searched_at))
WHERE last_searched_at < DATE_SUB(NOW(), INTERVAL 7 DAY);
-- 碎片整理
OPTIMIZE TABLE search_keywords;
-- 冷数据归档
INSERT INTO search_keywords_archive
SELECT * FROM search_keywords
WHERE last_searched_at < DATE_SUB(NOW(), INTERVAL 1 YEAR);
通过本文的详细设计方案,开发者可以构建出支持千万级搜索关键词的高性能MySQL存储系统。实际实施时需根据具体业务需求调整字段设计和查询策略,建议先在小规模数据上进行验证测试。