PULL_DATA.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. const CMD = {}
  2. const time_count = 1000;
  3. const redis_help = require('../src/use_redis');
  4. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  5. const axios = require('axios')
  6. const config = require('../etc/config.json');
  7. const DailyRecordManager = require('./daily_records');
  8. const schedule = require('node-schedule');
  9. const manager = new DailyRecordManager();
  10. const rabbitMq = require('../src/mq/rabbit-mq');
  11. const helper = require('../src/helper');
  12. const PullDataService = require('../src/PullDataService');
  13. var cur_page = 0
  14. var _24HourRanges = []
  15. var jobs = []
  16. var cur_day_data = new Map()
  17. var cur_timeRange = null
  18. var pullDataService = new PullDataService()
  19. var detail_task = []
  20. const messageHandler = async (msg) => {
  21. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  22. PullDataConfig = JSON.parse(PullDataConfig)
  23. _24HourRanges = PullDataConfig
  24. CMD.startScheduler(taskCallback)
  25. };
  26. const taskCallback = async (context) => {
  27. var { timeRange, executionTime } = context;
  28. console.log('执行任务:', {
  29. 时间区间: timeRange.name,
  30. 开始时间: timeRange.start,
  31. 执行时间: executionTime
  32. });
  33. const today = helper.getLocalDate();
  34. cur_day_data.forEach((v,k)=>{
  35. if(k==today){
  36. }else{
  37. cur_day_data.set(k, {})
  38. }
  39. })
  40. if(!cur_day_data.has(today)){
  41. cur_day_data.set(today,{})
  42. }
  43. if(!cur_day_data.get(today)[timeRange.start]){
  44. cur_day_data.get(today)[timeRange.start] = {name:timeRange.name,interval_minute:timeRange.interval_minute,finish_count:0}
  45. }
  46. console.log("cur_day_data[today]:",cur_day_data.get(today),today)
  47. const existingRecord = await manager.getRecord(today);
  48. if (existingRecord) {
  49. // 如果存在今天的记录,则更新
  50. const result = await manager.updateRecord(today, cur_day_data.get(today));
  51. console.log("更新记录结果:", result);
  52. } else {
  53. // 如果不存在今天的记录,则创建新记录
  54. const result = await manager.createRecord(today, cur_day_data.get(today));
  55. console.log("创建新记录结果:", result);
  56. }
  57. // const result = await manager.createRecord(today,cur_day_data[today]);
  58. // if(result.success){
  59. // }
  60. cur_page = 0
  61. cur_timeRange = timeRange
  62. cur_timeRange.finish_count = 0
  63. // 示例:实际任务逻辑
  64. await processTask(timeRange);
  65. };
  66. // 启动消费者
  67. async function startConsumer() {
  68. try {
  69. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig");
  70. } catch (error) {
  71. console.error('启动消费者失败:', error);
  72. }
  73. }
  74. async function processDetailTask() {
  75. try{
  76. if(detail_task.length>0){
  77. let task_item = detail_task.shift()
  78. let materialId_list = task_item.materialId_list
  79. let materialId_data_list = task_item.materialId_data_list
  80. let response = await pullDataService.get_detail(materialId_list)
  81. if(!response.success){
  82. console.log("get_detail:",response)
  83. throw {msg:response,timeRange:cur_timeRange,fun:"get_detail"}
  84. }
  85. if(cur_timeRange!=null){
  86. cur_timeRange.finish_count+=response.data.list.length
  87. const today = helper.getLocalDate();
  88. cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  89. await manager.updateRecord(today,cur_day_data.get(today));
  90. }
  91. for (let index = 0; index < response.data.list.length; index++) {
  92. const element = response.data.list[index];
  93. const info = materialId_data_list[element.materialId]
  94. if(element.hasAnchorInfo){
  95. await origin_data_controllers.createOriginData(
  96. {
  97. video_id:element.awemeId,
  98. materialId:element.materialId,
  99. video_link:element.awemeUrl,
  100. title:CMD.subTitle(element.title),
  101. publish_time:new Date(element.publishTime),
  102. kepp_num:info.kepp_num,
  103. comment_num:info.comment_num,
  104. like_num:info.like_num,
  105. shared_num:info.shared_num,
  106. is_guajian:1,
  107. guajian_link:element.landingUrl,
  108. status:0
  109. }
  110. )
  111. }
  112. }
  113. }
  114. }catch (error) {
  115. console.error("processDetailTask:",error)
  116. }finally{
  117. global.setTimeout(processDetailTask,500)
  118. }
  119. }
  120. async function processTask(timeRange){
  121. try{
  122. if(cur_page==-1){
  123. throw "没有数据了"
  124. }
  125. await pullDataService.getToken()
  126. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  127. FilterConfig = JSON.parse(FilterConfig)
  128. let size = 500
  129. if(cur_page>=1){
  130. let sy_count = cur_timeRange.count-cur_timeRange.finish_count
  131. if(sy_count<500){
  132. size = sy_count
  133. }
  134. }
  135. let response = await pullDataService.get_novel_material_list(cur_timeRange,cur_page,size)
  136. // console.log("get_novel_material_list:",response)
  137. let materialId_list = []
  138. let materialId_data_list = []
  139. if(!response.success){
  140. cur_page = -1
  141. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  142. }
  143. if(response.data == undefined||response.data == null){
  144. cur_page = -1
  145. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  146. }
  147. if(response.data.list == undefined||response.data.list == null){
  148. cur_page = -1
  149. throw "没有数据了"
  150. }
  151. if(response.data.list.length<=0){
  152. cur_page = -1
  153. throw "没有数据了"
  154. }
  155. if(timeRange!=null){
  156. const today = helper.getLocalDate();
  157. console.log("response.data:",response.data,today)
  158. if(cur_day_data.get(today)[timeRange.start]){
  159. console.log("response.data.count:",response.data.count)
  160. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data.get(today)[timeRange.start])
  161. cur_timeRange.count = response.data.count
  162. cur_timeRange.cur_page = cur_page
  163. cur_day_data.get(today)[timeRange.start].count = response.data.count
  164. cur_day_data.get(today)[timeRange.start].cur_page = cur_page
  165. }else{
  166. console.log("cur_day_data空")
  167. }
  168. const updateResult = await manager.updateRecord(today,cur_day_data.get(today));
  169. }
  170. for (let index = 0; index < response.data.list.length; index++) {
  171. const origin_element = response.data.list[index];
  172. const result = await origin_data_controllers.getOriginData({
  173. materialId:origin_element.materialId})
  174. if(result.success){
  175. // await origin_data_controllers.updateOriginData({id:result.data.id},
  176. // {
  177. // kepp_num:origin_element.favoriteCount.count,
  178. // comment_num:origin_element.commentCount.count,
  179. // like_num:origin_element.likeCount.count,
  180. // shared_num:origin_element.shareCount.count
  181. // })
  182. }else{
  183. if(CMD.isRight(FilterConfig,origin_element)){
  184. materialId_list.push(origin_element.materialId)
  185. materialId_data_list[origin_element.materialId] = {
  186. kepp_num:origin_element.favoriteCount.count,
  187. comment_num:origin_element.commentCount.count,
  188. like_num:origin_element.likeCount.count,
  189. shared_num:origin_element.shareCount.count
  190. }
  191. }
  192. }
  193. }
  194. detail_task.push({materialId_list:materialId_list,materialId_data_list:materialId_data_list})
  195. // response = await pullDataService.get_detail(materialId_list)
  196. // if(!response.success){
  197. // console.log("get_detail:",response)
  198. // throw {msg:response,timeRange:cur_timeRange,fun:"get_detail"}
  199. // }
  200. // if(cur_timeRange!=null){
  201. // cur_timeRange.finish_count+=response.data.list.length
  202. // const today = helper.getLocalDate();
  203. // cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  204. // await manager.updateRecord(today,cur_day_data.get(today));
  205. // }
  206. // for (let index = 0; index < response.data.list.length; index++) {
  207. // const element = response.data.list[index];
  208. // const info = materialId_data_list[element.materialId]
  209. // if(element.hasAnchorInfo){
  210. // await origin_data_controllers.createOriginData(
  211. // {
  212. // video_id:element.awemeId,
  213. // materialId:element.materialId,
  214. // video_link:element.awemeUrl,
  215. // title:CMD.subTitle(element.title),
  216. // publish_time:new Date(element.publishTime),
  217. // kepp_num:info.kepp_num,
  218. // comment_num:info.comment_num,
  219. // like_num:info.like_num,
  220. // shared_num:info.shared_num,
  221. // is_guajian:1,
  222. // guajian_link:element.landingUrl,
  223. // status:0
  224. // }
  225. // )
  226. // }
  227. // }
  228. }catch(e){
  229. console.error("PULL_DATA: error:",e)
  230. }finally{
  231. if(cur_page!=-1){
  232. cur_page++;
  233. global.setTimeout(processTask,time_count)
  234. }
  235. }
  236. }
  237. CMD.subTitle = function(title){
  238. if(title.length>299){
  239. return title.substring(0, 299);
  240. }
  241. return title
  242. }
  243. CMD.isRight = function(FilterConfig,origin_element){
  244. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  245. return false
  246. }
  247. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  248. return false
  249. }
  250. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  251. return false
  252. }
  253. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  254. return false
  255. }
  256. return true
  257. }
  258. CMD.init = async function(){
  259. // await startConsumer();
  260. redis_help.connect(async ()=>{
  261. startConsumer()
  262. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  263. PullDataConfig = JSON.parse(PullDataConfig)
  264. let response = await pullDataService.getToken()
  265. console.log("token:",response)
  266. // processTask()
  267. _24HourRanges = PullDataConfig
  268. // 定义任务回调函数
  269. let key = helper.getLocalDate();
  270. let record_res = await manager.getRecord(key);
  271. if(record_res!=null){
  272. if(!cur_day_data.has(key)){
  273. cur_day_data.set(key,record_res.content)
  274. }
  275. }
  276. // 启动调度器
  277. CMD.startScheduler(taskCallback);
  278. processDetailTask()
  279. })
  280. }
  281. // 生成24小时的时间区间
  282. CMD.generate24HourRanges = function() {
  283. // const ranges = [];
  284. // const today = new Date();
  285. // today.setMinutes(0);
  286. // today.setSeconds(0);
  287. // today.setMilliseconds(0);
  288. // for (let hour = 0; hour < 24; hour++) {
  289. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  290. // let periodName = '';
  291. // // 设置当天的小时
  292. // today.setHours(hour);
  293. // const timestamp = today.getTime();
  294. // // 根据时间划分时段
  295. // if (hour >= 0 && hour < 6) {
  296. // periodName = '凌晨区间';
  297. // } else if (hour >= 6 && hour < 9) {
  298. // periodName = '早晨区间';
  299. // } else if (hour >= 9 && hour < 12) {
  300. // periodName = '上午区间';
  301. // } else if (hour >= 12 && hour < 14) {
  302. // periodName = '中午区间';
  303. // } else if (hour >= 14 && hour < 18) {
  304. // periodName = '下午区间';
  305. // } else if (hour >= 18 && hour < 22) {
  306. // periodName = '晚上区间';
  307. // } else {
  308. // periodName = '深夜区间';
  309. // }
  310. // ranges.push({
  311. // name: periodName,
  312. // start: timeString,
  313. // timestamp: timestamp,
  314. // hour:hour,
  315. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  316. // });
  317. // }
  318. let ranges = [
  319. {
  320. name: '凌晨区间',
  321. start: '00:00',
  322. timestamp: 1733760000000,
  323. hour: 0,
  324. timeFormat: '12/10/2024, 12:00:00 AM'
  325. },
  326. {
  327. name: '凌晨区间',
  328. start: '01:00',
  329. timestamp: 1733763600000,
  330. hour: 1,
  331. timeFormat: '12/10/2024, 1:00:00 AM'
  332. },
  333. {
  334. name: '凌晨区间',
  335. start: '02:00',
  336. timestamp: 1733767200000,
  337. hour: 2,
  338. timeFormat: '12/10/2024, 2:00:00 AM'
  339. },
  340. {
  341. name: '凌晨区间',
  342. start: '03:00',
  343. timestamp: 1733770800000,
  344. hour: 3,
  345. timeFormat: '12/10/2024, 3:00:00 AM'
  346. },
  347. {
  348. name: '凌晨区间',
  349. start: '04:00',
  350. timestamp: 1733774400000,
  351. hour: 4,
  352. timeFormat: '12/10/2024, 4:00:00 AM'
  353. },
  354. {
  355. name: '凌晨区间',
  356. start: '05:00',
  357. timestamp: 1733778000000,
  358. hour: 5,
  359. timeFormat: '12/10/2024, 5:00:00 AM'
  360. },
  361. {
  362. name: '早晨区间',
  363. start: '06:00',
  364. timestamp: 1733781600000,
  365. hour: 6,
  366. timeFormat: '12/10/2024, 6:00:00 AM'
  367. },
  368. {
  369. name: '早晨区间',
  370. start: '07:00',
  371. timestamp: 1733785200000,
  372. hour: 7,
  373. timeFormat: '12/10/2024, 7:00:00 AM'
  374. },
  375. {
  376. name: '早晨区间',
  377. start: '08:00',
  378. timestamp: 1733788800000,
  379. hour: 8,
  380. timeFormat: '12/10/2024, 8:00:00 AM'
  381. },
  382. {
  383. name: '上午区间',
  384. start: '09:00',
  385. timestamp: 1733792400000,
  386. hour: 9,
  387. timeFormat: '12/10/2024, 9:00:00 AM'
  388. },
  389. {
  390. name: '上午区间',
  391. start: '10:00',
  392. timestamp: 1733796000000,
  393. hour: 10,
  394. timeFormat: '12/10/2024, 10:00:00 AM'
  395. },
  396. {
  397. name: '上午区间',
  398. start: '11:00',
  399. timestamp: 1733799600000,
  400. hour: 11,
  401. timeFormat: '12/10/2024, 11:00:00 AM'
  402. },
  403. {
  404. name: '中午区间',
  405. start: '12:00',
  406. timestamp: 1733803200000,
  407. hour: 12,
  408. timeFormat: '12/10/2024, 12:00:00 PM'
  409. },
  410. {
  411. name: '中午区间',
  412. start: '13:00',
  413. timestamp: 1733806800000,
  414. hour: 13,
  415. timeFormat: '12/10/2024, 1:00:00 PM'
  416. },
  417. {
  418. name: '下午区间',
  419. start: '14:00',
  420. timestamp: 1733810400000,
  421. hour: 14,
  422. timeFormat: '12/10/2024, 2:00:00 PM'
  423. },
  424. {
  425. name: '下午区间',
  426. start: '15:00',
  427. timestamp: 1733814000000,
  428. hour: 15,
  429. timeFormat: '12/10/2024, 3:00:00 PM'
  430. },
  431. {
  432. name: '下午区间',
  433. start: '16:00',
  434. timestamp: 1733817600000,
  435. hour: 16,
  436. timeFormat: '12/10/2024, 4:00:00 PM'
  437. },
  438. {
  439. name: '下午区间',
  440. start: '17:00',
  441. timestamp: 1733821200000,
  442. hour: 17,
  443. timeFormat: '12/10/2024, 5:00:00 PM'
  444. },
  445. {
  446. name: '晚上区间',
  447. start: '18:00',
  448. timestamp: 1733824800000,
  449. hour: 18,
  450. timeFormat: '12/10/2024, 6:00:00 PM'
  451. },
  452. {
  453. name: '晚上区间',
  454. start: '19:00',
  455. timestamp: 1733828400000,
  456. hour: 19,
  457. timeFormat: '12/10/2024, 7:00:00 PM'
  458. },
  459. {
  460. name: '晚上区间',
  461. start: '20:45',
  462. timestamp: 1733832000000,
  463. hour: 20,
  464. timeFormat: '12/10/2024, 8:00:00 PM'
  465. },
  466. {
  467. name: '晚上区间',
  468. start: '21:00',
  469. timestamp: 1733835600000,
  470. hour: 21,
  471. timeFormat: '12/10/2024, 9:00:00 PM'
  472. },
  473. {
  474. name: '深夜区间',
  475. start: '22:00',
  476. timestamp: 1733839200000,
  477. hour: 22,
  478. timeFormat: '12/10/2024, 10:00:00 PM'
  479. },
  480. {
  481. name: '深夜区间',
  482. start: '23:00',
  483. timestamp: 1733842800000,
  484. hour: 23,
  485. timeFormat: '12/10/2024, 11:00:00 PM'
  486. }
  487. ]
  488. return ranges;
  489. }
  490. CMD.stopScheduler = function() {
  491. jobs.forEach(({ job }) => job.cancel());
  492. jobs = [];
  493. console.log('调度器已停止,所有任务已清除');
  494. }
  495. CMD.startScheduler = function(taskCallback) {
  496. CMD.stopScheduler()
  497. // 为每个时间点创建定时任务
  498. _24HourRanges.forEach(timeRange => {
  499. // 解析小时
  500. const hour = parseInt(timeRange.start.split(':')[0]);
  501. const minute = parseInt(timeRange.start.split(':')[1]);
  502. // 创建定时任务 - 在每天的指定小时整点执行
  503. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  504. try {
  505. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  506. // 执行回调函数
  507. await taskCallback({
  508. timeRange,
  509. executionTime: new Date()
  510. });
  511. } catch (error) {
  512. // 记录失败
  513. console.error('任务执行错误:', error);
  514. }
  515. });
  516. jobs.push({
  517. job,
  518. timeRange
  519. });
  520. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  521. });
  522. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  523. }
  524. CMD.init()