PULL_DATA.js 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. const CMD = {}
  2. const time_count = 1000;
  3. const redis_help = require('../src/use_redis');
  4. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  5. const axios = require('axios')
  6. const config = require('../etc/config.json');
  7. const DailyRecordManager = require('./daily_records');
  8. const schedule = require('node-schedule');
  9. const manager = new DailyRecordManager();
  10. const rabbitMq = require('../src/mq/rabbit-mq');
  11. const helper = require('../src/helper');
  12. const PullDataService = require('../src/PullDataService');
  13. const pull_data_task_item = require('./pull_data_task_item');
  14. var _24HourRanges = []
  15. var jobs = []
  16. var cur_day_data = new Map()
  17. var pull_data_task_map = []
  18. var cur_timeRange = null
  19. var pullDataService = new PullDataService(redis_help)
  20. var detail_task = []
  21. var task_params = []
  22. const messageHandler = async (msg) => {
  23. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  24. PullDataConfig = JSON.parse(PullDataConfig)
  25. _24HourRanges = PullDataConfig
  26. CMD.startScheduler(taskCallback)
  27. };
  28. const get_pull_data_task = function(){
  29. for (let index = 0; index < pull_data_task_map.length; index++) {
  30. const element = pull_data_task_map[index];
  31. if(!element.isRunTask()){
  32. return element
  33. }
  34. }
  35. let item = new pull_data_task_item(redis_help)
  36. pull_data_task_map.push(item)
  37. return item
  38. }
  39. const taskCallback = async (context) => {
  40. var { timeRange, executionTime } = context;
  41. console.log('执行任务:', {
  42. 时间区间: timeRange.name,
  43. 开始时间: timeRange.start,
  44. 执行时间: executionTime
  45. });
  46. const today = helper.getLocalDate();
  47. cur_day_data.forEach((v,k)=>{
  48. if(k==today){
  49. }else{
  50. cur_day_data.set(k, {})
  51. }
  52. })
  53. if(!cur_day_data.has(today)){
  54. cur_day_data.set(today,{})
  55. }
  56. if(!cur_day_data.get(today)[timeRange.start]){
  57. cur_day_data.get(today)[timeRange.start] = {name:timeRange.name,interval_minute:timeRange.interval_minute,finish_count:0}
  58. }
  59. console.log("cur_day_data[today]:",cur_day_data.get(today),today)
  60. const existingRecord = await manager.getRecord(today);
  61. if (existingRecord) {
  62. // 如果存在今天的记录,则更新
  63. const result = await manager.updateRecord(today, cur_day_data.get(today));
  64. console.log("更新记录结果:", result);
  65. } else {
  66. // 如果不存在今天的记录,则创建新记录
  67. const result = await manager.createRecord(today, cur_day_data.get(today));
  68. console.log("创建新记录结果:", result);
  69. }
  70. // const result = await manager.createRecord(today,cur_day_data[today]);
  71. // if(result.success){
  72. // }
  73. cur_timeRange = timeRange
  74. cur_timeRange.finish_count = 0
  75. process_update_token()
  76. // 示例:实际任务逻辑
  77. await processTask(timeRange);
  78. };
  79. // 启动消费者
  80. async function startConsumer() {
  81. try {
  82. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig");
  83. } catch (error) {
  84. console.error('启动消费者失败:', error);
  85. }
  86. }
  87. async function processDetailTask() {
  88. try{
  89. if(detail_task.length>0){
  90. console.log("processDetailTask")
  91. let task_item = detail_task.shift()
  92. let materialId_list = task_item.materialId_list
  93. let materialId_data_list = task_item.materialId_data_list
  94. let response = await pullDataService.get_detail(materialId_list)
  95. if(!response.success){
  96. console.log("get_detail:",response)
  97. throw {msg:response,timeRange:cur_timeRange,fun:"get_detail"}
  98. }
  99. if(cur_timeRange!=null){
  100. cur_timeRange.finish_count+=response.data.list.length
  101. const today = helper.getLocalDate();
  102. cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  103. await manager.updateRecord(today,cur_day_data.get(today));
  104. }
  105. for (let index = 0; index < response.data.list.length; index++) {
  106. const element = response.data.list[index];
  107. const info = materialId_data_list[element.materialId]
  108. if(element.hasAnchorInfo){
  109. await origin_data_controllers.createOriginData(
  110. {
  111. video_id:element.awemeId,
  112. materialId:element.materialId,
  113. video_link:element.awemeUrl,
  114. title:CMD.subTitle(element.title),
  115. publish_time:new Date(element.publishTime),
  116. kepp_num:info.kepp_num,
  117. comment_num:info.comment_num,
  118. like_num:info.like_num,
  119. shared_num:info.shared_num,
  120. is_guajian:1,
  121. guajian_link:element.landingUrl,
  122. status:0
  123. }
  124. )
  125. }
  126. }
  127. console.log(" finish processDetailTask")
  128. }
  129. }catch (error) {
  130. console.error("processDetailTask:",error)
  131. }finally{
  132. global.setTimeout(processDetailTask,500)
  133. }
  134. }
  135. async function process_material_list(response,page,timeRange) {
  136. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  137. FilterConfig = JSON.parse(FilterConfig)
  138. let materialId_list = []
  139. let materialId_data_list = []
  140. if(!response.success){
  141. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  142. }
  143. if(response.data == undefined||response.data == null){
  144. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  145. }
  146. if(response.data.list == undefined||response.data.list == null){
  147. throw "没有数据了"
  148. }
  149. if(response.data.list.length<=0){
  150. throw "没有数据了"
  151. }
  152. if(timeRange!=null){
  153. const today = helper.getLocalDate();
  154. console.log("response.data:",response.data,today)
  155. if(cur_day_data.get(today)[timeRange.start]){
  156. console.log("response.data.count:",response.data.count)
  157. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data.get(today)[timeRange.start])
  158. cur_timeRange.count = response.data.count
  159. cur_timeRange.cur_page = page
  160. cur_day_data.get(today)[timeRange.start].count = response.data.count
  161. cur_day_data.get(today)[timeRange.start].cur_page = page
  162. }else{
  163. console.log("cur_day_data空")
  164. }
  165. manager.updateRecord(today,cur_day_data.get(today));
  166. }
  167. for (let index = 0; index < response.data.list.length; index++) {
  168. const origin_element = response.data.list[index];
  169. const result = await origin_data_controllers.getOriginData({
  170. materialId:origin_element.materialId})
  171. if(result.success){
  172. // await origin_data_controllers.updateOriginData({id:result.data.id},
  173. // {
  174. // kepp_num:origin_element.favoriteCount.count,
  175. // comment_num:origin_element.commentCount.count,
  176. // like_num:origin_element.likeCount.count,
  177. // shared_num:origin_element.shareCount.count
  178. // })
  179. }else{
  180. if(CMD.isRight(FilterConfig,origin_element)){
  181. materialId_list.push(origin_element.materialId)
  182. materialId_data_list[origin_element.materialId] = {
  183. kepp_num:origin_element.favoriteCount.count,
  184. comment_num:origin_element.commentCount.count,
  185. like_num:origin_element.likeCount.count,
  186. shared_num:origin_element.shareCount.count
  187. }
  188. }
  189. }
  190. }
  191. return {materialId_list:materialId_list,materialId_data_list:materialId_data_list,count:response.data.count}
  192. }
  193. async function finish_material_list(response,page) {
  194. if(!response.success){
  195. console.error("finish_material_list:",response)
  196. }else{
  197. let detail_item = await process_material_list(response,page,null)
  198. detail_task.push(detail_item)
  199. }
  200. }
  201. async function process_update_token() {
  202. try{
  203. let response = await pullDataService.getToken()
  204. console.log("token:",response.data.token)
  205. }catch(e){
  206. console.error("process_update_token:",e)
  207. }finally{
  208. // setTimeout(process_update_token,200)
  209. }
  210. }
  211. async function process_novel_material_list() {
  212. try{
  213. if(task_params.length>0){
  214. let element = task_params.shift()
  215. const pull_task_item = get_pull_data_task();
  216. await pull_task_item.runTask(
  217. finish_material_list,
  218. cur_timeRange,
  219. element.page,
  220. element.limit
  221. );
  222. await new Promise(resolve => setTimeout(resolve, 200));
  223. }
  224. }catch(e){
  225. }finally{
  226. if(task_params.length>0){
  227. setTimeout(process_novel_material_list,200)
  228. }
  229. }
  230. }
  231. async function processTask(timeRange){
  232. try{
  233. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  234. FilterConfig = JSON.parse(FilterConfig)
  235. let response = await pullDataService.get_novel_material_list(cur_timeRange,0,500)
  236. // console.log("get_novel_material_list:",response)
  237. let detail_item = await process_material_list(response,0,timeRange)
  238. // { page: 1, offset: 0, limit: 500 },
  239. task_params = helper.getPaginationParams(detail_item.count,500)
  240. task_params.shift()
  241. detail_task.push(detail_item)
  242. // for (let index = 0; index < detail_task.length; index++) {
  243. // const element = detail_task[index];
  244. // const pull_task_item = get_pull_data_task();
  245. // pull_task_item.runTask(
  246. // finish_material_list,
  247. // cur_timeRange,
  248. // element.page,
  249. // element.limit
  250. // )
  251. // }
  252. await process_novel_material_list()
  253. console.log("processTask over!")
  254. // response = await pullDataService.get_detail(materialId_list)
  255. // if(!response.success){
  256. // console.log("get_detail:",response)
  257. // throw {msg:response,timeRange:cur_timeRange,fun:"get_detail"}
  258. // }
  259. // if(cur_timeRange!=null){
  260. // cur_timeRange.finish_count+=response.data.list.length
  261. // const today = helper.getLocalDate();
  262. // cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  263. // await manager.updateRecord(today,cur_day_data.get(today));
  264. // }
  265. // for (let index = 0; index < response.data.list.length; index++) {
  266. // const element = response.data.list[index];
  267. // const info = materialId_data_list[element.materialId]
  268. // if(element.hasAnchorInfo){
  269. // await origin_data_controllers.createOriginData(
  270. // {
  271. // video_id:element.awemeId,
  272. // materialId:element.materialId,
  273. // video_link:element.awemeUrl,
  274. // title:CMD.subTitle(element.title),
  275. // publish_time:new Date(element.publishTime),
  276. // kepp_num:info.kepp_num,
  277. // comment_num:info.comment_num,
  278. // like_num:info.like_num,
  279. // shared_num:info.shared_num,
  280. // is_guajian:1,
  281. // guajian_link:element.landingUrl,
  282. // status:0
  283. // }
  284. // )
  285. // }
  286. // }
  287. }catch(e){
  288. console.error("PULL_DATA: error:",e)
  289. }finally{
  290. // if(cur_page!=-1){
  291. // cur_page++;
  292. // global.setTimeout(processTask,time_count)
  293. // }
  294. }
  295. }
  296. CMD.subTitle = function(title){
  297. if(title.length>299){
  298. return title.substring(0, 299);
  299. }
  300. return title
  301. }
  302. CMD.isRight = function(FilterConfig,origin_element){
  303. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  304. return false
  305. }
  306. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  307. return false
  308. }
  309. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  310. return false
  311. }
  312. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  313. return false
  314. }
  315. return true
  316. }
  317. CMD.init = async function(){
  318. // await startConsumer();
  319. redis_help.connect(async ()=>{
  320. startConsumer()
  321. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  322. PullDataConfig = JSON.parse(PullDataConfig)
  323. let response = await pullDataService.getToken()
  324. console.log("token:",response)
  325. // processTask()
  326. _24HourRanges = PullDataConfig
  327. // 定义任务回调函数
  328. let key = helper.getLocalDate();
  329. let record_res = await manager.getRecord(key);
  330. if(record_res!=null){
  331. if(!cur_day_data.has(key)){
  332. cur_day_data.set(key,record_res.content)
  333. }
  334. }
  335. await pullDataService.getToken()
  336. // 启动调度器
  337. CMD.startScheduler(taskCallback);
  338. processDetailTask()
  339. // process_update_token()
  340. })
  341. }
  342. // 生成24小时的时间区间
  343. CMD.generate24HourRanges = function() {
  344. // const ranges = [];
  345. // const today = new Date();
  346. // today.setMinutes(0);
  347. // today.setSeconds(0);
  348. // today.setMilliseconds(0);
  349. // for (let hour = 0; hour < 24; hour++) {
  350. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  351. // let periodName = '';
  352. // // 设置当天的小时
  353. // today.setHours(hour);
  354. // const timestamp = today.getTime();
  355. // // 根据时间划分时段
  356. // if (hour >= 0 && hour < 6) {
  357. // periodName = '凌晨区间';
  358. // } else if (hour >= 6 && hour < 9) {
  359. // periodName = '早晨区间';
  360. // } else if (hour >= 9 && hour < 12) {
  361. // periodName = '上午区间';
  362. // } else if (hour >= 12 && hour < 14) {
  363. // periodName = '中午区间';
  364. // } else if (hour >= 14 && hour < 18) {
  365. // periodName = '下午区间';
  366. // } else if (hour >= 18 && hour < 22) {
  367. // periodName = '晚上区间';
  368. // } else {
  369. // periodName = '深夜区间';
  370. // }
  371. // ranges.push({
  372. // name: periodName,
  373. // start: timeString,
  374. // timestamp: timestamp,
  375. // hour:hour,
  376. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  377. // });
  378. // }
  379. let ranges = [
  380. {
  381. name: '凌晨区间',
  382. start: '00:00',
  383. timestamp: 1733760000000,
  384. hour: 0,
  385. timeFormat: '12/10/2024, 12:00:00 AM'
  386. },
  387. {
  388. name: '凌晨区间',
  389. start: '01:00',
  390. timestamp: 1733763600000,
  391. hour: 1,
  392. timeFormat: '12/10/2024, 1:00:00 AM'
  393. },
  394. {
  395. name: '凌晨区间',
  396. start: '02:00',
  397. timestamp: 1733767200000,
  398. hour: 2,
  399. timeFormat: '12/10/2024, 2:00:00 AM'
  400. },
  401. {
  402. name: '凌晨区间',
  403. start: '03:00',
  404. timestamp: 1733770800000,
  405. hour: 3,
  406. timeFormat: '12/10/2024, 3:00:00 AM'
  407. },
  408. {
  409. name: '凌晨区间',
  410. start: '04:00',
  411. timestamp: 1733774400000,
  412. hour: 4,
  413. timeFormat: '12/10/2024, 4:00:00 AM'
  414. },
  415. {
  416. name: '凌晨区间',
  417. start: '05:00',
  418. timestamp: 1733778000000,
  419. hour: 5,
  420. timeFormat: '12/10/2024, 5:00:00 AM'
  421. },
  422. {
  423. name: '早晨区间',
  424. start: '06:00',
  425. timestamp: 1733781600000,
  426. hour: 6,
  427. timeFormat: '12/10/2024, 6:00:00 AM'
  428. },
  429. {
  430. name: '早晨区间',
  431. start: '07:00',
  432. timestamp: 1733785200000,
  433. hour: 7,
  434. timeFormat: '12/10/2024, 7:00:00 AM'
  435. },
  436. {
  437. name: '早晨区间',
  438. start: '08:00',
  439. timestamp: 1733788800000,
  440. hour: 8,
  441. timeFormat: '12/10/2024, 8:00:00 AM'
  442. },
  443. {
  444. name: '上午区间',
  445. start: '09:00',
  446. timestamp: 1733792400000,
  447. hour: 9,
  448. timeFormat: '12/10/2024, 9:00:00 AM'
  449. },
  450. {
  451. name: '上午区间',
  452. start: '10:00',
  453. timestamp: 1733796000000,
  454. hour: 10,
  455. timeFormat: '12/10/2024, 10:00:00 AM'
  456. },
  457. {
  458. name: '上午区间',
  459. start: '11:00',
  460. timestamp: 1733799600000,
  461. hour: 11,
  462. timeFormat: '12/10/2024, 11:00:00 AM'
  463. },
  464. {
  465. name: '中午区间',
  466. start: '12:00',
  467. timestamp: 1733803200000,
  468. hour: 12,
  469. timeFormat: '12/10/2024, 12:00:00 PM'
  470. },
  471. {
  472. name: '中午区间',
  473. start: '13:00',
  474. timestamp: 1733806800000,
  475. hour: 13,
  476. timeFormat: '12/10/2024, 1:00:00 PM'
  477. },
  478. {
  479. name: '下午区间',
  480. start: '14:00',
  481. timestamp: 1733810400000,
  482. hour: 14,
  483. timeFormat: '12/10/2024, 2:00:00 PM'
  484. },
  485. {
  486. name: '下午区间',
  487. start: '15:00',
  488. timestamp: 1733814000000,
  489. hour: 15,
  490. timeFormat: '12/10/2024, 3:00:00 PM'
  491. },
  492. {
  493. name: '下午区间',
  494. start: '16:00',
  495. timestamp: 1733817600000,
  496. hour: 16,
  497. timeFormat: '12/10/2024, 4:00:00 PM'
  498. },
  499. {
  500. name: '下午区间',
  501. start: '17:00',
  502. timestamp: 1733821200000,
  503. hour: 17,
  504. timeFormat: '12/10/2024, 5:00:00 PM'
  505. },
  506. {
  507. name: '晚上区间',
  508. start: '18:00',
  509. timestamp: 1733824800000,
  510. hour: 18,
  511. timeFormat: '12/10/2024, 6:00:00 PM'
  512. },
  513. {
  514. name: '晚上区间',
  515. start: '19:00',
  516. timestamp: 1733828400000,
  517. hour: 19,
  518. timeFormat: '12/10/2024, 7:00:00 PM'
  519. },
  520. {
  521. name: '晚上区间',
  522. start: '20:45',
  523. timestamp: 1733832000000,
  524. hour: 20,
  525. timeFormat: '12/10/2024, 8:00:00 PM'
  526. },
  527. {
  528. name: '晚上区间',
  529. start: '21:00',
  530. timestamp: 1733835600000,
  531. hour: 21,
  532. timeFormat: '12/10/2024, 9:00:00 PM'
  533. },
  534. {
  535. name: '深夜区间',
  536. start: '22:00',
  537. timestamp: 1733839200000,
  538. hour: 22,
  539. timeFormat: '12/10/2024, 10:00:00 PM'
  540. },
  541. {
  542. name: '深夜区间',
  543. start: '23:00',
  544. timestamp: 1733842800000,
  545. hour: 23,
  546. timeFormat: '12/10/2024, 11:00:00 PM'
  547. }
  548. ]
  549. return ranges;
  550. }
  551. CMD.stopScheduler = function() {
  552. jobs.forEach(({ job }) => job.cancel());
  553. jobs = [];
  554. console.log('调度器已停止,所有任务已清除');
  555. }
  556. CMD.startScheduler = function(taskCallback) {
  557. CMD.stopScheduler()
  558. // 为每个时间点创建定时任务
  559. _24HourRanges.forEach(timeRange => {
  560. // 解析小时
  561. const hour = parseInt(timeRange.start.split(':')[0]);
  562. const minute = parseInt(timeRange.start.split(':')[1]);
  563. // 创建定时任务 - 在每天的指定小时整点执行
  564. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  565. try {
  566. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  567. // 执行回调函数
  568. await taskCallback({
  569. timeRange,
  570. executionTime: new Date()
  571. });
  572. } catch (error) {
  573. // 记录失败
  574. console.error('任务执行错误:', error);
  575. }
  576. });
  577. jobs.push({
  578. job,
  579. timeRange
  580. });
  581. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  582. });
  583. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  584. }
  585. CMD.init()