PULL_DATA.js 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. const CMD = {}
  2. const time_count = 1000;
  3. const redis_help = require('../src/use_redis');
  4. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  5. const axios = require('axios')
  6. const config = require('../etc/config.json');
  7. const DailyRecordManager = require('./daily_records');
  8. const schedule = require('node-schedule');
  9. const manager = new DailyRecordManager();
  10. const rabbitMq = require('../src/mq/rabbit-mq');
  11. const helper = require('../src/helper');
  12. const PullDataService = require('../src/PullDataService');
  13. const pull_data_task_item = require('./pull_data_task_item');
  14. var _24HourRanges = []
  15. var jobs = []
  16. var cur_day_data = new Map()
  17. var pull_data_task_map = []
  18. var cur_timeRange = null
  19. var pullDataService = new PullDataService(redis_help)
  20. var detail_task = []
  21. const messageHandler = async (msg) => {
  22. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  23. PullDataConfig = JSON.parse(PullDataConfig)
  24. _24HourRanges = PullDataConfig
  25. CMD.startScheduler(taskCallback)
  26. };
  27. const get_pull_data_task = function(){
  28. for (let index = 0; index < pull_data_task_map.length; index++) {
  29. const element = pull_data_task_map[index];
  30. if(!element.isRunTask()){
  31. return element
  32. }
  33. }
  34. let item = new pull_data_task_item(redis_help)
  35. pull_data_task_map.push(item)
  36. return item
  37. }
  38. const taskCallback = async (context) => {
  39. var { timeRange, executionTime } = context;
  40. console.log('执行任务:', {
  41. 时间区间: timeRange.name,
  42. 开始时间: timeRange.start,
  43. 执行时间: executionTime
  44. });
  45. const today = helper.getLocalDate();
  46. cur_day_data.forEach((v,k)=>{
  47. if(k==today){
  48. }else{
  49. cur_day_data.set(k, {})
  50. }
  51. })
  52. if(!cur_day_data.has(today)){
  53. cur_day_data.set(today,{})
  54. }
  55. if(!cur_day_data.get(today)[timeRange.start]){
  56. cur_day_data.get(today)[timeRange.start] = {name:timeRange.name,interval_minute:timeRange.interval_minute,finish_count:0}
  57. }
  58. console.log("cur_day_data[today]:",cur_day_data.get(today),today)
  59. const existingRecord = await manager.getRecord(today);
  60. if (existingRecord) {
  61. // 如果存在今天的记录,则更新
  62. const result = await manager.updateRecord(today, cur_day_data.get(today));
  63. console.log("更新记录结果:", result);
  64. } else {
  65. // 如果不存在今天的记录,则创建新记录
  66. const result = await manager.createRecord(today, cur_day_data.get(today));
  67. console.log("创建新记录结果:", result);
  68. }
  69. // const result = await manager.createRecord(today,cur_day_data[today]);
  70. // if(result.success){
  71. // }
  72. cur_timeRange = timeRange
  73. cur_timeRange.finish_count = 0
  74. // 示例:实际任务逻辑
  75. await processTask(timeRange);
  76. };
  77. // 启动消费者
  78. async function startConsumer() {
  79. try {
  80. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig");
  81. } catch (error) {
  82. console.error('启动消费者失败:', error);
  83. }
  84. }
  85. async function processDetailTask() {
  86. try{
  87. if(detail_task.length>0){
  88. console.log("processDetailTask")
  89. let task_item = detail_task.shift()
  90. let materialId_list = task_item.materialId_list
  91. let materialId_data_list = task_item.materialId_data_list
  92. let response = await pullDataService.get_detail(materialId_list)
  93. if(!response.success){
  94. console.log("get_detail:",response)
  95. throw {msg:response,timeRange:cur_timeRange,fun:"get_detail"}
  96. }
  97. if(cur_timeRange!=null){
  98. cur_timeRange.finish_count+=response.data.list.length
  99. const today = helper.getLocalDate();
  100. cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  101. await manager.updateRecord(today,cur_day_data.get(today));
  102. }
  103. for (let index = 0; index < response.data.list.length; index++) {
  104. const element = response.data.list[index];
  105. const info = materialId_data_list[element.materialId]
  106. if(element.hasAnchorInfo){
  107. await origin_data_controllers.createOriginData(
  108. {
  109. video_id:element.awemeId,
  110. materialId:element.materialId,
  111. video_link:element.awemeUrl,
  112. title:CMD.subTitle(element.title),
  113. publish_time:new Date(element.publishTime),
  114. kepp_num:info.kepp_num,
  115. comment_num:info.comment_num,
  116. like_num:info.like_num,
  117. shared_num:info.shared_num,
  118. is_guajian:1,
  119. guajian_link:element.landingUrl,
  120. status:0
  121. }
  122. )
  123. }
  124. }
  125. console.log(" finish processDetailTask")
  126. }
  127. }catch (error) {
  128. console.error("processDetailTask:",error)
  129. }finally{
  130. global.setTimeout(processDetailTask,500)
  131. }
  132. }
  133. async function process_material_list(response,page,timeRange) {
  134. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  135. FilterConfig = JSON.parse(FilterConfig)
  136. let materialId_list = []
  137. let materialId_data_list = []
  138. if(!response.success){
  139. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  140. }
  141. if(response.data == undefined||response.data == null){
  142. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  143. }
  144. if(response.data.list == undefined||response.data.list == null){
  145. throw "没有数据了"
  146. }
  147. if(response.data.list.length<=0){
  148. throw "没有数据了"
  149. }
  150. if(timeRange!=null){
  151. const today = helper.getLocalDate();
  152. console.log("response.data:",response.data,today)
  153. if(cur_day_data.get(today)[timeRange.start]){
  154. console.log("response.data.count:",response.data.count)
  155. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data.get(today)[timeRange.start])
  156. cur_timeRange.count = response.data.count
  157. cur_timeRange.cur_page = page
  158. cur_day_data.get(today)[timeRange.start].count = response.data.count
  159. cur_day_data.get(today)[timeRange.start].cur_page = page
  160. }else{
  161. console.log("cur_day_data空")
  162. }
  163. manager.updateRecord(today,cur_day_data.get(today));
  164. }
  165. for (let index = 0; index < response.data.list.length; index++) {
  166. const origin_element = response.data.list[index];
  167. const result = await origin_data_controllers.getOriginData({
  168. materialId:origin_element.materialId})
  169. if(result.success){
  170. // await origin_data_controllers.updateOriginData({id:result.data.id},
  171. // {
  172. // kepp_num:origin_element.favoriteCount.count,
  173. // comment_num:origin_element.commentCount.count,
  174. // like_num:origin_element.likeCount.count,
  175. // shared_num:origin_element.shareCount.count
  176. // })
  177. }else{
  178. if(CMD.isRight(FilterConfig,origin_element)){
  179. materialId_list.push(origin_element.materialId)
  180. materialId_data_list[origin_element.materialId] = {
  181. kepp_num:origin_element.favoriteCount.count,
  182. comment_num:origin_element.commentCount.count,
  183. like_num:origin_element.likeCount.count,
  184. shared_num:origin_element.shareCount.count
  185. }
  186. }
  187. }
  188. }
  189. return {materialId_list:materialId_list,materialId_data_list:materialId_data_list,count:response.data.count}
  190. }
  191. async function finish_material_list(response,page) {
  192. if(!response.success){
  193. console.error("finish_material_list:",response)
  194. }else{
  195. let detail_item = await process_material_list(response,page,null)
  196. detail_task.push(detail_item)
  197. }
  198. }
  199. async function process_update_token() {
  200. try{
  201. let response = await pullDataService.getToken()
  202. console.log("token:",response.data.token)
  203. }catch(e){
  204. console.error("process_update_token:",e)
  205. }finally{
  206. setTimeout(process_update_token,200)
  207. }
  208. }
  209. async function processTask(timeRange){
  210. try{
  211. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  212. FilterConfig = JSON.parse(FilterConfig)
  213. let response = await pullDataService.get_novel_material_list(cur_timeRange,0,500)
  214. // console.log("get_novel_material_list:",response)
  215. let detail_item = await process_material_list(response,0,timeRange)
  216. // { page: 1, offset: 0, limit: 500 },
  217. let params = helper.getPaginationParams(detail_item.count,500)
  218. detail_task.push(detail_item)
  219. for (let index = 1; index < params.length; index++) {
  220. const element = params[index];
  221. // setTimeout(()=>{
  222. // let pull_task_item = get_pull_data_task()
  223. // pull_task_item.runTask(finish_material_list,cur_timeRange,element.page,element.limit)
  224. // },200*index)
  225. await new Promise(resolve => setTimeout(resolve, 200));
  226. const pull_task_item = get_pull_data_task();
  227. await pull_task_item.runTask(
  228. finish_material_list,
  229. cur_timeRange,
  230. element.page,
  231. element.limit
  232. );
  233. }
  234. console.log("processTask over!")
  235. // response = await pullDataService.get_detail(materialId_list)
  236. // if(!response.success){
  237. // console.log("get_detail:",response)
  238. // throw {msg:response,timeRange:cur_timeRange,fun:"get_detail"}
  239. // }
  240. // if(cur_timeRange!=null){
  241. // cur_timeRange.finish_count+=response.data.list.length
  242. // const today = helper.getLocalDate();
  243. // cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  244. // await manager.updateRecord(today,cur_day_data.get(today));
  245. // }
  246. // for (let index = 0; index < response.data.list.length; index++) {
  247. // const element = response.data.list[index];
  248. // const info = materialId_data_list[element.materialId]
  249. // if(element.hasAnchorInfo){
  250. // await origin_data_controllers.createOriginData(
  251. // {
  252. // video_id:element.awemeId,
  253. // materialId:element.materialId,
  254. // video_link:element.awemeUrl,
  255. // title:CMD.subTitle(element.title),
  256. // publish_time:new Date(element.publishTime),
  257. // kepp_num:info.kepp_num,
  258. // comment_num:info.comment_num,
  259. // like_num:info.like_num,
  260. // shared_num:info.shared_num,
  261. // is_guajian:1,
  262. // guajian_link:element.landingUrl,
  263. // status:0
  264. // }
  265. // )
  266. // }
  267. // }
  268. }catch(e){
  269. console.error("PULL_DATA: error:",e)
  270. }finally{
  271. // if(cur_page!=-1){
  272. // cur_page++;
  273. // global.setTimeout(processTask,time_count)
  274. // }
  275. }
  276. }
  277. CMD.subTitle = function(title){
  278. if(title.length>299){
  279. return title.substring(0, 299);
  280. }
  281. return title
  282. }
  283. CMD.isRight = function(FilterConfig,origin_element){
  284. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  285. return false
  286. }
  287. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  288. return false
  289. }
  290. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  291. return false
  292. }
  293. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  294. return false
  295. }
  296. return true
  297. }
  298. CMD.init = async function(){
  299. // await startConsumer();
  300. redis_help.connect(async ()=>{
  301. startConsumer()
  302. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  303. PullDataConfig = JSON.parse(PullDataConfig)
  304. let response = await pullDataService.getToken()
  305. console.log("token:",response)
  306. // processTask()
  307. _24HourRanges = PullDataConfig
  308. // 定义任务回调函数
  309. let key = helper.getLocalDate();
  310. let record_res = await manager.getRecord(key);
  311. if(record_res!=null){
  312. if(!cur_day_data.has(key)){
  313. cur_day_data.set(key,record_res.content)
  314. }
  315. }
  316. // 启动调度器
  317. CMD.startScheduler(taskCallback);
  318. processDetailTask()
  319. process_update_token()
  320. })
  321. }
  322. // 生成24小时的时间区间
  323. CMD.generate24HourRanges = function() {
  324. // const ranges = [];
  325. // const today = new Date();
  326. // today.setMinutes(0);
  327. // today.setSeconds(0);
  328. // today.setMilliseconds(0);
  329. // for (let hour = 0; hour < 24; hour++) {
  330. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  331. // let periodName = '';
  332. // // 设置当天的小时
  333. // today.setHours(hour);
  334. // const timestamp = today.getTime();
  335. // // 根据时间划分时段
  336. // if (hour >= 0 && hour < 6) {
  337. // periodName = '凌晨区间';
  338. // } else if (hour >= 6 && hour < 9) {
  339. // periodName = '早晨区间';
  340. // } else if (hour >= 9 && hour < 12) {
  341. // periodName = '上午区间';
  342. // } else if (hour >= 12 && hour < 14) {
  343. // periodName = '中午区间';
  344. // } else if (hour >= 14 && hour < 18) {
  345. // periodName = '下午区间';
  346. // } else if (hour >= 18 && hour < 22) {
  347. // periodName = '晚上区间';
  348. // } else {
  349. // periodName = '深夜区间';
  350. // }
  351. // ranges.push({
  352. // name: periodName,
  353. // start: timeString,
  354. // timestamp: timestamp,
  355. // hour:hour,
  356. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  357. // });
  358. // }
  359. let ranges = [
  360. {
  361. name: '凌晨区间',
  362. start: '00:00',
  363. timestamp: 1733760000000,
  364. hour: 0,
  365. timeFormat: '12/10/2024, 12:00:00 AM'
  366. },
  367. {
  368. name: '凌晨区间',
  369. start: '01:00',
  370. timestamp: 1733763600000,
  371. hour: 1,
  372. timeFormat: '12/10/2024, 1:00:00 AM'
  373. },
  374. {
  375. name: '凌晨区间',
  376. start: '02:00',
  377. timestamp: 1733767200000,
  378. hour: 2,
  379. timeFormat: '12/10/2024, 2:00:00 AM'
  380. },
  381. {
  382. name: '凌晨区间',
  383. start: '03:00',
  384. timestamp: 1733770800000,
  385. hour: 3,
  386. timeFormat: '12/10/2024, 3:00:00 AM'
  387. },
  388. {
  389. name: '凌晨区间',
  390. start: '04:00',
  391. timestamp: 1733774400000,
  392. hour: 4,
  393. timeFormat: '12/10/2024, 4:00:00 AM'
  394. },
  395. {
  396. name: '凌晨区间',
  397. start: '05:00',
  398. timestamp: 1733778000000,
  399. hour: 5,
  400. timeFormat: '12/10/2024, 5:00:00 AM'
  401. },
  402. {
  403. name: '早晨区间',
  404. start: '06:00',
  405. timestamp: 1733781600000,
  406. hour: 6,
  407. timeFormat: '12/10/2024, 6:00:00 AM'
  408. },
  409. {
  410. name: '早晨区间',
  411. start: '07:00',
  412. timestamp: 1733785200000,
  413. hour: 7,
  414. timeFormat: '12/10/2024, 7:00:00 AM'
  415. },
  416. {
  417. name: '早晨区间',
  418. start: '08:00',
  419. timestamp: 1733788800000,
  420. hour: 8,
  421. timeFormat: '12/10/2024, 8:00:00 AM'
  422. },
  423. {
  424. name: '上午区间',
  425. start: '09:00',
  426. timestamp: 1733792400000,
  427. hour: 9,
  428. timeFormat: '12/10/2024, 9:00:00 AM'
  429. },
  430. {
  431. name: '上午区间',
  432. start: '10:00',
  433. timestamp: 1733796000000,
  434. hour: 10,
  435. timeFormat: '12/10/2024, 10:00:00 AM'
  436. },
  437. {
  438. name: '上午区间',
  439. start: '11:00',
  440. timestamp: 1733799600000,
  441. hour: 11,
  442. timeFormat: '12/10/2024, 11:00:00 AM'
  443. },
  444. {
  445. name: '中午区间',
  446. start: '12:00',
  447. timestamp: 1733803200000,
  448. hour: 12,
  449. timeFormat: '12/10/2024, 12:00:00 PM'
  450. },
  451. {
  452. name: '中午区间',
  453. start: '13:00',
  454. timestamp: 1733806800000,
  455. hour: 13,
  456. timeFormat: '12/10/2024, 1:00:00 PM'
  457. },
  458. {
  459. name: '下午区间',
  460. start: '14:00',
  461. timestamp: 1733810400000,
  462. hour: 14,
  463. timeFormat: '12/10/2024, 2:00:00 PM'
  464. },
  465. {
  466. name: '下午区间',
  467. start: '15:00',
  468. timestamp: 1733814000000,
  469. hour: 15,
  470. timeFormat: '12/10/2024, 3:00:00 PM'
  471. },
  472. {
  473. name: '下午区间',
  474. start: '16:00',
  475. timestamp: 1733817600000,
  476. hour: 16,
  477. timeFormat: '12/10/2024, 4:00:00 PM'
  478. },
  479. {
  480. name: '下午区间',
  481. start: '17:00',
  482. timestamp: 1733821200000,
  483. hour: 17,
  484. timeFormat: '12/10/2024, 5:00:00 PM'
  485. },
  486. {
  487. name: '晚上区间',
  488. start: '18:00',
  489. timestamp: 1733824800000,
  490. hour: 18,
  491. timeFormat: '12/10/2024, 6:00:00 PM'
  492. },
  493. {
  494. name: '晚上区间',
  495. start: '19:00',
  496. timestamp: 1733828400000,
  497. hour: 19,
  498. timeFormat: '12/10/2024, 7:00:00 PM'
  499. },
  500. {
  501. name: '晚上区间',
  502. start: '20:45',
  503. timestamp: 1733832000000,
  504. hour: 20,
  505. timeFormat: '12/10/2024, 8:00:00 PM'
  506. },
  507. {
  508. name: '晚上区间',
  509. start: '21:00',
  510. timestamp: 1733835600000,
  511. hour: 21,
  512. timeFormat: '12/10/2024, 9:00:00 PM'
  513. },
  514. {
  515. name: '深夜区间',
  516. start: '22:00',
  517. timestamp: 1733839200000,
  518. hour: 22,
  519. timeFormat: '12/10/2024, 10:00:00 PM'
  520. },
  521. {
  522. name: '深夜区间',
  523. start: '23:00',
  524. timestamp: 1733842800000,
  525. hour: 23,
  526. timeFormat: '12/10/2024, 11:00:00 PM'
  527. }
  528. ]
  529. return ranges;
  530. }
  531. CMD.stopScheduler = function() {
  532. jobs.forEach(({ job }) => job.cancel());
  533. jobs = [];
  534. console.log('调度器已停止,所有任务已清除');
  535. }
  536. CMD.startScheduler = function(taskCallback) {
  537. CMD.stopScheduler()
  538. // 为每个时间点创建定时任务
  539. _24HourRanges.forEach(timeRange => {
  540. // 解析小时
  541. const hour = parseInt(timeRange.start.split(':')[0]);
  542. const minute = parseInt(timeRange.start.split(':')[1]);
  543. // 创建定时任务 - 在每天的指定小时整点执行
  544. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  545. try {
  546. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  547. // 执行回调函数
  548. await taskCallback({
  549. timeRange,
  550. executionTime: new Date()
  551. });
  552. } catch (error) {
  553. // 记录失败
  554. console.error('任务执行错误:', error);
  555. }
  556. });
  557. jobs.push({
  558. job,
  559. timeRange
  560. });
  561. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  562. });
  563. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  564. }
  565. CMD.init()