MySql分组后随机获取每组一条数据的操作
思路:先随机排序然后再分组就好了。 1、创建表: CREATE TABLE xdx_test
( id
int(11) NOT NULL, name
varchar(255) DEFAULT NULL, class
varchar(255) DEFAULT NULL, PRIMARY KEY (id
)) ENGINE=InnoDB DEFAULT CHARSET=utf
思路:先随机排序然后再分组就好了。
1、创建表:
CREATE TABLE xdx_test
(
id
int(11) NOT NULL,
name
varchar(255) DEFAULT NULL,
class
varchar(255) DEFAULT NULL,
PRIMARY KEY (id
)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
2、插入数据
INSERT INTO xdx_test VALUES (1, '张三-1','1'); INSERT INTO xdx_test VALUES (2, '李四-1','1'); INSERT INTO xdx_test VALUES (3, '王五-1','1'); INSERT INTO xdx_test VALUES (4, '张三-2','2'); INSERT INTO xdx_test VALUES (5, '李四-2','2'); INSERT INTO xdx_test VALUES (6, '王五-2','2'); INSERT INTO xdx_test VALUES (7, '张三-3','3'); INSERT INTO xdx_test VALUES (8, '李四-3','3'); INSERT INTO xdx_test VALUES (9, '王五-3','3');
3、查询语句
SELECT FROM (SELECT FROM xdx_test ORDER BY RAND()) a GROUP BY a.class
4、查询结果
3 王五-1 1
5 李四-2 2
9 王五-3 3
3 王五-1 1
4 张三-2 2
7 张三-3 3
2 李四-1 1
5 李四-2 2
8 李四-3 3
补充知识:mysql实现随机获取几条数据的方法(效率和离散型比较)
sql语句有几种写法、效率、以及离散型 比较
1:SELECT * FROM tablename ORDER BY RAND() LIMIT 想要获取的数据条数;
2:SELECT *FROM `table` WHERE id >= (SELECT FLOOR( MAX(id) * RAND()) FROM `table` ) ORDER BY id LIMIT 想要获取的数据条数;
3:SELECT * FROM `table` AS t1 JOIN (SELECT ROUND(RAND() * (SELECT MAX(id) FROM `table`)) AS id) AS t2 WHERE t1.id >= t2.id
ORDER BY t1.id ASC LIMIT 想要获取的数据条数;
4:SELECT * FROM `table`WHERE id >= (SELECT floor(RAND() * (SELECT MAX(id) FROM `table`))) ORDER BY id LIMIT 想要获取的数据条数;
5:SELECT * FROM `table` WHERE id >= (SELECT floor( RAND() * ((SELECT MAX(id) FROM `table`)-(SELECT MIN(id) FROM `table`)) + (SELECT MIN(id) FROM `table`))) ORDER BY id LIMIT 想要获取的数据条数;
6:SELECT * FROM `table` AS t1 JOIN (SELECT ROUND(RAND() * ((SELECT MAX(id) FROM `table`)-(SELECT MIN(id) FROM `table`))+(SELECT MIN(id) FROM `table`)) AS id) AS t2 WHERE t1.id >= t2.id ORDER BY t1.id LIMIT 想要获取的数据条数;
1的查询时间>>2的查询时间>>5的查询时间>6的查询时间>4的查询时间>3的查询时间,也就是3的效率最高。
以上6种只是单纯的从效率上做了比较;
上面的6种随机数抽取可分为2类:
第一个的离散型比较高,但是效率低;其他5个都效率比较高,但是存在离散性不高的问题;
怎么解决效率和离散型都满足条件啦?
我们有一个思路就是: 写一个存储过程;
select * FROM test t1 JOIN (SELECT ROUND(RAND() * ((SELECT MAX(id) FROM test)-(SELECT MIN(id) FROM test)) + (SELECT MIN(id) FROM test)) AS id) t2 where t1.id >= t2.id limit 1
每次取出一条,然后循环写入一张临时表中;最后返回 select 临时表就OK;
这样既满足了效率又解决了离散型的问题;可以兼并二者的优点;
下面是具体存储过程的伪代码
DROP PROCEDURE IF EXISTS evaluate_Check_procedure
;
DELIMITER ;;
CREATE DEFINER=root
@%
PROCEDURE evaluate_Check_procedure
(IN startTime datetime, IN endTime datetime,IN checkNum INT,IN evaInterface VARCHAR(36))
BEGIN
-- 新建一张临时表 ,存放随机取出的数据
create temporary table if not exists xdr_authen_tmp (
ID
bigint(20) NOT NULL AUTO_INCREMENT COMMENT '序号',
LENGTH
int(5) DEFAULT NULL COMMENT '字节数',
INTERFACE
int(3) NOT NULL COMMENT '接口',
XDR_ID
varchar(32) NOT NULL COMMENT 'XDR ID',
MSISDN
varchar(32) DEFAULT NULL COMMENT '用户号码',
PROCEDURE_START_TIME
datetime NOT NULL DEFAULT '0000-00-00 00:00:00' COMMENT '开始时间',
PROCEDURE_END_TIME
datetime DEFAULT NULL COMMENT '结束时间',
SOURCE_NE_IP
varchar(39) DEFAULT NULL COMMENT '源网元IP',
SOURCE_NE_PORT
int(5) DEFAULT NULL COMMENT '源网元端口',
DESTINATION_NE_IP
varchar(39) DEFAULT NULL COMMENT '目的网元IP',
DESTINATION_NE_PORT
int(5) DEFAULT NULL COMMENT '目的网元端口',
INSERT_DATE
datetime DEFAULT NULL COMMENT '插入时间',
EXTEND1
varchar(50) DEFAULT NULL COMMENT '扩展1',
EXTEND2
varchar(50) DEFAULT NULL COMMENT '扩展2',
EXTEND3
varchar(50) DEFAULT NULL COMMENT '扩展3',
EXTEND4
varchar(50) DEFAULT NULL COMMENT '扩展4',
EXTEND5
varchar(50) DEFAULT NULL COMMENT '扩展5',
PRIMARY KEY (ID
,PROCEDURE_START_TIME
),
KEY index_procedure_start_time
(PROCEDURE_START_TIME
),
KEY index_source_dest_ip
(SOURCE_NE_IP
,DESTINATION_NE_IP
),
KEY index_xdr_id
(XDR_ID
)
) ENGINE = InnoDB DEFAULT CHARSET=utf8;
BEGIN DECLARE j INT; DECLARE i INT;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET i = 1;
-- 这里的checkNum是需要随机获取的数据数,比如随机获取10条,那这里就是10,通过while循环来逐个获取单个随机记录;
SET j = 0; WHILE j < checkNum DO set @sqlexi = concat( ' SELECT t1.ID,t1.LENGTH,t1.LOCAL_PROVINCE,t1.LOCAL_CITY,t1.OWNER_PROVINCE,t1.OWNER_CITY,t1.ROAMING_TYPE,t1.INTERFACE,t1.XDR_ID,t1.RAT,t1.IMSI,t1.IMEI,t1.MSISDN,t1.PROCEDURE_START_TIME,t1.PROCEDURE_END_TIME,t1.TRANSACTION_TYPE,t1.TRANSACTION_STATUS,t1.SOURCE_NE_IP,t1.SOURCE_NE_PORT,t1.DESTINATION_NE_IP,t1.DESTINATION_NE_PORT,t1.RESULT_CODE,t1.EXPERIMENTAL_RESULT_CODE,t1.ORIGIN_REALM,t1.DESTINATION_REALM,t1.ORIGIN_HOST,t1.DESTINATION_HOST,t1.INSERT_DATE', ' into @ID,@LENGTH,@LOCAL_PROVINCE,@LOCAL_CITY,@OWNER_PROVINCE,@OWNER_CITY,@ROAMING_TYPE,@INTERFACE,@XDR_ID,@RAT,@IMSI,@IMEI,@MSISDN,@PROCEDURE_START_TIME,@PROCEDURE_END_TIME,@TRANSACTION_TYPE,@TRANSACTION_STATUS,@SOURCE_NE_IP,@SOURCE_NE_PORT,@DESTINATION_NE_IP,@DESTINATION_NE_PORT,@RESULT_CODE,@EXPERIMENTAL_RESULT_CODE,@ORIGIN_REALM,@DESTINATION_REALM,@ORIGIN_HOST,@DESTINATION_HOST,@INSERT_DATE ', ' FROM xdr_authen t1 JOIN (SELECT ROUND(RAND() * ((SELECT MAX(id) FROM xdr_authen)-(SELECT MIN(id) FROM xdr_authen)) + (SELECT MIN(id) FROM xdr_authen)) AS id) t2', ' WHERE t1.PROCEDURE_START_TIME >= "',startTime,'"', ' AND t1.PROCEDURE_START_TIME < "',endTime,'"',' AND t1.INTERFACE IN (',evaInterface,')', ' and t1.id >= t2.id limit 1'); PREPARE sqlexi FROM @sqlexi; EXECUTE sqlexi; DEALLOCATE PREPARE sqlexi;
-- 这里获取的记录有可能会重复,如果是重复数据,我们则不往临时表中插入此条数据,再进行下一次随机数据的获取。依次类推,直到随机数据取够为止;
select count(1) into @num from xdr_authen_tmp where id = @ID;
if @num > 0 or i=1 then SET j = j; ELSE insert into xdr_authen_tmp(ID,LENGTH,LOCAL_PROVINCE,LOCAL_CITY,OWNER_PROVINCE,OWNER_CITY,ROAMING_TYPE,INTERFACE,XDR_ID,RAT,IMSI,IMEI,MSISDN,PROCEDURE_START_TIME,PROCEDURE_END_TIME,TRANSACTION_TYPE,TRANSACTION_STATUS,SOURCE_NE_IP,SOURCE_NE_PORT,DESTINATION_NE_IP,DESTINATION_NE_PORT,RESULT_CODE,EXPERIMENTAL_RESULT_CODE,ORIGIN_REALM,DESTINATION_REALM,ORIGIN_HOST,DESTINATION_HOST,INSERT_DATE) VALUES(@ID,@LENGTH,@LOCAL_PROVINCE,@LOCAL_CITY,@OWNER_PROVINCE,@OWNER_CITY,@ROAMING_TYPE,@INTERFACE,@XDR_ID,@RAT,@IMSI,@IMEI,@MSISDN,@PROCEDURE_START_TIME,@PROCEDURE_END_TIME,@TRANSACTION_TYPE,@TRANSACTION_STATUS,@SOURCE_NE_IP,@SOURCE_NE_PORT,@DESTINATION_NE_IP,@DESTINATION_NE_PORT,@RESULT_CODE,@EXPERIMENTAL_RESULT_CODE,@ORIGIN_REALM,@DESTINATION_REALM,@ORIGIN_HOST,@DESTINATION_HOST,@INSERT_DATE);
SET j = j + 1; end if; SET i=0;
END WHILE;
-- 最后我们将所有的随机数查询出来,以结果集的形式返回给后台
select ID,LENGTH,LOCAL_PROVINCE,LOCAL_CITY,OWNER_PROVINCE,OWNER_CITY,ROAMING_TYPE,INTERFACE,XDR_ID,RAT,IMSI,IMEI,MSISDN,PROCEDURE_START_TIME,PROCEDURE_END_TIME,TRANSACTION_TYPE,TRANSACTION_STATUS,SOURCE_NE_IP,SOURCE_NE_PORT,DESTINATION_NE_IP,DESTINATION_NE_PORT,RESULT_CODE,EXPERIMENTAL_RESULT_CODE,ORIGIN_REALM,DESTINATION_REALM,ORIGIN_HOST,DESTINATION_HOST,INSERT_DATE from xdr_authen_tmp;
END; truncate TABLE xdr_authen_tmp;
END ;; DELIMITER ;
以上这篇MySql分组后随机获取每组一条数据的操作就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持每日运维。