PULL_DATA_NEW.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. const CMD = {}
  2. const redis_help = require('../src/use_redis');
  3. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  4. const config = require('../etc/config.json');
  5. const DailyRecordManager = require('./daily_records');
  6. const schedule = require('node-schedule');
  7. const manager = new DailyRecordManager();
  8. const rabbitMq = require('../src/mq/rabbit-mq');
  9. const helper = require('../src/helper');
  10. const PullDataServiceNew = require('../src/PullDataServiceNew');
  11. var _24HourRanges = []
  12. var jobs = []
  13. var cur_day_data = new Map()
  14. var cur_timeRange = null
  15. var pullDataService = new PullDataServiceNew(redis_help)
  16. var cur_page = 0
  17. const messageHandler = async (msg) => {
  18. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  19. PullDataConfig = JSON.parse(PullDataConfig)
  20. _24HourRanges = PullDataConfig
  21. CMD.startScheduler(taskCallback)
  22. };
  23. const taskCallback = async (context) => {
  24. var { timeRange, executionTime } = context;
  25. console.log('执行任务:', {
  26. 时间区间: timeRange.name,
  27. 开始时间: timeRange.start,
  28. 执行时间: executionTime
  29. });
  30. const today = helper.getLocalDate();
  31. cur_day_data.forEach((v,k)=>{
  32. if(k==today){
  33. }else{
  34. cur_day_data.set(k, {})
  35. }
  36. })
  37. if(!cur_day_data.has(today)){
  38. cur_day_data.set(today,{})
  39. }
  40. if(!cur_day_data.get(today)[timeRange.start]){
  41. cur_day_data.get(today)[timeRange.start] = {name:timeRange.name,interval_minute:timeRange.interval_minute,finish_count:0}
  42. }
  43. console.log("cur_day_data[today]:",cur_day_data.get(today),today)
  44. const existingRecord = await manager.getRecord(today);
  45. if (existingRecord) {
  46. // 如果存在今天的记录,则更新
  47. const result = await manager.updateRecord(today, cur_day_data.get(today));
  48. console.log("更新记录结果:", result);
  49. } else {
  50. // 如果不存在今天的记录,则创建新记录
  51. const result = await manager.createRecord(today, cur_day_data.get(today));
  52. console.log("创建新记录结果:", result);
  53. }
  54. cur_timeRange = timeRange
  55. cur_timeRange.today = today
  56. cur_timeRange.finish_count = 0
  57. cur_page = 0
  58. await processTask();
  59. };
  60. // 启动消费者
  61. async function startConsumer() {
  62. try {
  63. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig");
  64. } catch (error) {
  65. console.error('启动消费者失败:', error);
  66. }
  67. }
  68. function get_detail_by_materialId(list,materialId){
  69. for (let index = 0; index < list.length; index++) {
  70. const element = list[index];
  71. if(element.materialId==materialId){
  72. return element
  73. }
  74. }
  75. return null
  76. }
  77. function generate_pull_time_data(list,material_list){
  78. let pull_data = []
  79. for (let index = 0; index < material_list.length; index++) {
  80. const element = material_list[index];
  81. let info = get_detail_by_materialId(list,element.materialId)
  82. if(info!=null){
  83. let new_data = {
  84. awemeId:info.awemeId,
  85. materialId:info.materialId,
  86. awemeUrl:info.awemeUrl,
  87. title:info.title,
  88. publishTime:info.publishTime,
  89. hasAnchorInfo:info.hasAnchorInfo,
  90. landingUrl:info.landingUrl,
  91. createTime:info.createTime,
  92. updateTime:info.updateTime,
  93. favoriteCount:element.favoriteCount.count,
  94. commentCount:element.commentCount.count,
  95. likeCount:element.likeCount.count,
  96. shareCount:element.shareCount.count
  97. }
  98. pull_data.push(new_data)
  99. }
  100. }
  101. sendPullDataMQMessage(JSON.stringify({cmd:"on_recv_pull_data",data:pull_data,today:cur_timeRange.today}))
  102. }
  103. async function process_material_list(response,page) {
  104. try{
  105. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  106. FilterConfig = JSON.parse(FilterConfig)
  107. let materialId_list = []
  108. let materialId_data_list = []
  109. if(!response.success){
  110. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  111. }
  112. if(response.data == undefined||response.data == null){
  113. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  114. }
  115. if(response.data.list == undefined||response.data.list == null){
  116. throw "没有数据了"
  117. }
  118. if(response.data.list.length<=0){
  119. throw "没有数据了"
  120. }
  121. if(cur_timeRange!=null){ //统计
  122. const today = cur_timeRange.today;
  123. // console.log("response.data:",response.data,today)
  124. if(cur_day_data.get(today)[cur_timeRange.start]){
  125. console.log("response.data.count:",response.data.count)
  126. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data.get(today)[cur_timeRange.start])
  127. cur_timeRange.count = cur_timeRange.count + response.data.list.length
  128. cur_timeRange.cur_page = page
  129. cur_day_data.get(today)[cur_timeRange.start].start_time = cur_timeRange.start_time
  130. cur_day_data.get(today)[cur_timeRange.start].pull_day = cur_timeRange.pull_day
  131. cur_day_data.get(today)[cur_timeRange.start].pull_time = cur_timeRange.pull_time ||{}
  132. cur_day_data.get(today)[cur_timeRange.start].count = response.data.count
  133. cur_day_data.get(today)[cur_timeRange.start].cur_page = page
  134. }else{
  135. console.log("cur_day_data空")
  136. }
  137. manager.updateRecord(today,cur_day_data.get(today));
  138. }
  139. for (let index = 0; index < response.data.list.length; index++) {
  140. const origin_element = response.data.list[index];
  141. materialId_list.push(origin_element.materialId)
  142. materialId_data_list[origin_element.materialId] = {
  143. kepp_num:origin_element.favoriteCount.count,
  144. comment_num:origin_element.commentCount.count,
  145. like_num:origin_element.likeCount.count,
  146. shared_num:origin_element.shareCount.count
  147. }
  148. }
  149. let result = await origin_data_controllers.findNonExistentMaterialIds(materialId_list)
  150. return {list:response.data.list,id_list:materialId_list,materialId_list:result.data.nonExistentIds,materialId_data_list:materialId_data_list,count:response.data.list.length}
  151. }catch(e){
  152. console.log("materialId_list:",e)
  153. return null
  154. }
  155. }
  156. async function processTask(){
  157. try{
  158. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  159. FilterConfig = JSON.parse(FilterConfig)
  160. let response = await pullDataService.get_novel_material_list(cur_timeRange,cur_page,500)
  161. if(response.msg=="success"){
  162. if(response.data.list.length<=0){
  163. cur_page = -1;
  164. throw response
  165. }
  166. let last_data = response.data.list[response.data.list.length-1]
  167. cur_page = new Date(last_data["updateTime"]);
  168. let endTime = new Date(cur_timeRange.pull_day);
  169. if(cur_page>=endTime){
  170. cur_page = -1;
  171. throw response
  172. }
  173. }else{
  174. cur_page = -1;
  175. throw response
  176. }
  177. sendPullDataMQMessage(JSON.stringify({cmd:"on_recv_pull_data",data:response.data.list,today:cur_timeRange.today}))
  178. // if(detail_item.materialId_list.length<=0){
  179. // }else{
  180. // await processDetailTask(detail_item)
  181. // }
  182. console.log("processTask over!")
  183. }catch(e){
  184. console.error("PULL_DATA: error:",e)
  185. }finally{
  186. if(cur_page!=-1){
  187. cur_page++;
  188. global.setTimeout(processTask,1000)
  189. }
  190. }
  191. }
  192. CMD.subTitle = function(title){
  193. if(title.length>299){
  194. return title.substring(0, 299);
  195. }
  196. return title
  197. }
  198. CMD.isRight = function(FilterConfig,origin_element){
  199. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  200. return false
  201. }
  202. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  203. return false
  204. }
  205. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  206. return false
  207. }
  208. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  209. return false
  210. }
  211. return true
  212. }
  213. CMD.init = async function(){
  214. // await startConsumer();
  215. redis_help.connect(async ()=>{
  216. startConsumer()
  217. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  218. PullDataConfig = JSON.parse(PullDataConfig)
  219. // processTask()
  220. _24HourRanges = PullDataConfig
  221. // 定义任务回调函数
  222. let key = helper.getLocalDate();
  223. let record_res = await manager.getRecord(key);
  224. if(record_res!=null){
  225. if(!cur_day_data.has(key)){
  226. cur_day_data.set(key,record_res.content)
  227. }
  228. }
  229. // 启动调度器
  230. CMD.startScheduler(taskCallback);
  231. })
  232. }
  233. // 生成24小时的时间区间
  234. CMD.generate24HourRanges = function() {
  235. // const ranges = [];
  236. // const today = new Date();
  237. // today.setMinutes(0);
  238. // today.setSeconds(0);
  239. // today.setMilliseconds(0);
  240. // for (let hour = 0; hour < 24; hour++) {
  241. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  242. // let periodName = '';
  243. // // 设置当天的小时
  244. // today.setHours(hour);
  245. // const timestamp = today.getTime();
  246. // // 根据时间划分时段
  247. // if (hour >= 0 && hour < 6) {
  248. // periodName = '凌晨区间';
  249. // } else if (hour >= 6 && hour < 9) {
  250. // periodName = '早晨区间';
  251. // } else if (hour >= 9 && hour < 12) {
  252. // periodName = '上午区间';
  253. // } else if (hour >= 12 && hour < 14) {
  254. // periodName = '中午区间';
  255. // } else if (hour >= 14 && hour < 18) {
  256. // periodName = '下午区间';
  257. // } else if (hour >= 18 && hour < 22) {
  258. // periodName = '晚上区间';
  259. // } else {
  260. // periodName = '深夜区间';
  261. // }
  262. // ranges.push({
  263. // name: periodName,
  264. // start: timeString,
  265. // timestamp: timestamp,
  266. // hour:hour,
  267. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  268. // });
  269. // }
  270. let ranges = [
  271. {
  272. name: '凌晨区间',
  273. start: '00:00',
  274. timestamp: 1733760000000,
  275. hour: 0,
  276. timeFormat: '12/10/2024, 12:00:00 AM'
  277. },
  278. {
  279. name: '凌晨区间',
  280. start: '01:00',
  281. timestamp: 1733763600000,
  282. hour: 1,
  283. timeFormat: '12/10/2024, 1:00:00 AM'
  284. },
  285. {
  286. name: '凌晨区间',
  287. start: '02:00',
  288. timestamp: 1733767200000,
  289. hour: 2,
  290. timeFormat: '12/10/2024, 2:00:00 AM'
  291. },
  292. {
  293. name: '凌晨区间',
  294. start: '03:00',
  295. timestamp: 1733770800000,
  296. hour: 3,
  297. timeFormat: '12/10/2024, 3:00:00 AM'
  298. },
  299. {
  300. name: '凌晨区间',
  301. start: '04:00',
  302. timestamp: 1733774400000,
  303. hour: 4,
  304. timeFormat: '12/10/2024, 4:00:00 AM'
  305. },
  306. {
  307. name: '凌晨区间',
  308. start: '05:00',
  309. timestamp: 1733778000000,
  310. hour: 5,
  311. timeFormat: '12/10/2024, 5:00:00 AM'
  312. },
  313. {
  314. name: '早晨区间',
  315. start: '06:00',
  316. timestamp: 1733781600000,
  317. hour: 6,
  318. timeFormat: '12/10/2024, 6:00:00 AM'
  319. },
  320. {
  321. name: '早晨区间',
  322. start: '07:00',
  323. timestamp: 1733785200000,
  324. hour: 7,
  325. timeFormat: '12/10/2024, 7:00:00 AM'
  326. },
  327. {
  328. name: '早晨区间',
  329. start: '08:00',
  330. timestamp: 1733788800000,
  331. hour: 8,
  332. timeFormat: '12/10/2024, 8:00:00 AM'
  333. },
  334. {
  335. name: '上午区间',
  336. start: '09:00',
  337. timestamp: 1733792400000,
  338. hour: 9,
  339. timeFormat: '12/10/2024, 9:00:00 AM'
  340. },
  341. {
  342. name: '上午区间',
  343. start: '10:00',
  344. timestamp: 1733796000000,
  345. hour: 10,
  346. timeFormat: '12/10/2024, 10:00:00 AM'
  347. },
  348. {
  349. name: '上午区间',
  350. start: '11:00',
  351. timestamp: 1733799600000,
  352. hour: 11,
  353. timeFormat: '12/10/2024, 11:00:00 AM'
  354. },
  355. {
  356. name: '中午区间',
  357. start: '12:00',
  358. timestamp: 1733803200000,
  359. hour: 12,
  360. timeFormat: '12/10/2024, 12:00:00 PM'
  361. },
  362. {
  363. name: '中午区间',
  364. start: '13:00',
  365. timestamp: 1733806800000,
  366. hour: 13,
  367. timeFormat: '12/10/2024, 1:00:00 PM'
  368. },
  369. {
  370. name: '下午区间',
  371. start: '14:00',
  372. timestamp: 1733810400000,
  373. hour: 14,
  374. timeFormat: '12/10/2024, 2:00:00 PM'
  375. },
  376. {
  377. name: '下午区间',
  378. start: '15:00',
  379. timestamp: 1733814000000,
  380. hour: 15,
  381. timeFormat: '12/10/2024, 3:00:00 PM'
  382. },
  383. {
  384. name: '下午区间',
  385. start: '16:00',
  386. timestamp: 1733817600000,
  387. hour: 16,
  388. timeFormat: '12/10/2024, 4:00:00 PM'
  389. },
  390. {
  391. name: '下午区间',
  392. start: '17:00',
  393. timestamp: 1733821200000,
  394. hour: 17,
  395. timeFormat: '12/10/2024, 5:00:00 PM'
  396. },
  397. {
  398. name: '晚上区间',
  399. start: '18:00',
  400. timestamp: 1733824800000,
  401. hour: 18,
  402. timeFormat: '12/10/2024, 6:00:00 PM'
  403. },
  404. {
  405. name: '晚上区间',
  406. start: '19:00',
  407. timestamp: 1733828400000,
  408. hour: 19,
  409. timeFormat: '12/10/2024, 7:00:00 PM'
  410. },
  411. {
  412. name: '晚上区间',
  413. start: '20:45',
  414. timestamp: 1733832000000,
  415. hour: 20,
  416. timeFormat: '12/10/2024, 8:00:00 PM'
  417. },
  418. {
  419. name: '晚上区间',
  420. start: '21:00',
  421. timestamp: 1733835600000,
  422. hour: 21,
  423. timeFormat: '12/10/2024, 9:00:00 PM'
  424. },
  425. {
  426. name: '深夜区间',
  427. start: '22:00',
  428. timestamp: 1733839200000,
  429. hour: 22,
  430. timeFormat: '12/10/2024, 10:00:00 PM'
  431. },
  432. {
  433. name: '深夜区间',
  434. start: '23:00',
  435. timestamp: 1733842800000,
  436. hour: 23,
  437. timeFormat: '12/10/2024, 11:00:00 PM'
  438. }
  439. ]
  440. return ranges;
  441. }
  442. CMD.stopScheduler = function() {
  443. jobs.forEach(({ job }) => job.cancel());
  444. jobs = [];
  445. console.log('调度器已停止,所有任务已清除');
  446. }
  447. CMD.startScheduler = function(taskCallback) {
  448. CMD.stopScheduler()
  449. // 为每个时间点创建定时任务
  450. _24HourRanges.forEach(timeRange => {
  451. // 解析小时
  452. const hour = parseInt(timeRange.start.split(':')[0]);
  453. const minute = parseInt(timeRange.start.split(':')[1]);
  454. // 创建定时任务 - 在每天的指定小时整点执行
  455. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  456. try {
  457. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  458. // 执行回调函数
  459. await taskCallback({
  460. timeRange,
  461. executionTime: new Date()
  462. });
  463. } catch (error) {
  464. // 记录失败
  465. console.error('任务执行错误:', error);
  466. }
  467. });
  468. jobs.push({
  469. job,
  470. timeRange
  471. });
  472. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  473. });
  474. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  475. }
  476. async function sendPullDataMQMessage(message,routingKey = "on_recv_pull_data") {
  477. try {
  478. if(routingKey!=null){
  479. await rabbitMq.producerDirectMsg( message,"exchange_pull_data_system_new",routingKey);
  480. }else{
  481. await rabbitMq.producerDirectMsg( message,"exchange_pull_data_system_new");
  482. }
  483. console.log('消息发送成功');
  484. } catch (error) {
  485. console.error('发送消息失败:', error);
  486. }
  487. }
  488. if(config.isDebug){
  489. // CMD.init()
  490. }