PULL_DATA.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. const CMD = {}
  2. const time_count = 1000;
  3. const redis_help = require('../src/use_redis');
  4. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  5. const axios = require('axios')
  6. const config = require('../etc/config.json');
  7. const DailyRecordManager = require('./daily_records');
  8. const schedule = require('node-schedule');
  9. const manager = new DailyRecordManager();
  10. const rabbitMq = require('../src/mq/rabbit-mq');
  11. const helper = require('../src/helper');
  12. const PullDataService = require('../src/PullDataService');
  13. const pull_data_task_item = require('./pull_data_task_item');
  14. var _24HourRanges = []
  15. var jobs = []
  16. var cur_day_data = new Map()
  17. var pull_data_task_map = []
  18. var cur_timeRange = null
  19. var pullDataService = new PullDataService(redis_help)
  20. var detail_task = []
  21. var task_params = []
  22. var cur_page = 0
  23. const messageHandler = async (msg) => {
  24. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  25. PullDataConfig = JSON.parse(PullDataConfig)
  26. _24HourRanges = PullDataConfig
  27. CMD.startScheduler(taskCallback)
  28. };
  29. const get_pull_data_task = function(){
  30. for (let index = 0; index < pull_data_task_map.length; index++) {
  31. const element = pull_data_task_map[index];
  32. if(!element.isRunTask()){
  33. return element
  34. }
  35. }
  36. let item = new pull_data_task_item(redis_help)
  37. pull_data_task_map.push(item)
  38. return item
  39. }
  40. const taskCallback = async (context) => {
  41. var { timeRange, executionTime } = context;
  42. console.log('执行任务:', {
  43. 时间区间: timeRange.name,
  44. 开始时间: timeRange.start,
  45. 执行时间: executionTime
  46. });
  47. const today = helper.getLocalDate();
  48. cur_day_data.forEach((v,k)=>{
  49. if(k==today){
  50. }else{
  51. cur_day_data.set(k, {})
  52. }
  53. })
  54. if(!cur_day_data.has(today)){
  55. cur_day_data.set(today,{})
  56. }
  57. if(!cur_day_data.get(today)[timeRange.start]){
  58. cur_day_data.get(today)[timeRange.start] = {name:timeRange.name,interval_minute:timeRange.interval_minute,finish_count:0}
  59. }
  60. console.log("cur_day_data[today]:",cur_day_data.get(today),today)
  61. const existingRecord = await manager.getRecord(today);
  62. if (existingRecord) {
  63. // 如果存在今天的记录,则更新
  64. const result = await manager.updateRecord(today, cur_day_data.get(today));
  65. console.log("更新记录结果:", result);
  66. } else {
  67. // 如果不存在今天的记录,则创建新记录
  68. const result = await manager.createRecord(today, cur_day_data.get(today));
  69. console.log("创建新记录结果:", result);
  70. }
  71. cur_timeRange = timeRange
  72. cur_timeRange.finish_count = 0
  73. cur_page = 0
  74. await processTask();
  75. };
  76. // 启动消费者
  77. async function startConsumer() {
  78. try {
  79. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig");
  80. } catch (error) {
  81. console.error('启动消费者失败:', error);
  82. }
  83. }
  84. async function processDetailTask(task_item) {
  85. try{
  86. console.log(" start processDetailTask")
  87. let materialId_list = task_item.materialId_list
  88. let materialId_data_list = task_item.materialId_data_list
  89. let response = await pullDataService.get_detail(materialId_list)
  90. if(!response.success){
  91. console.log("get_detail:",response)
  92. throw {msg:response,timeRange:cur_timeRange,fun:"get_detail",materialId_list:materialId_list}
  93. }
  94. if(cur_timeRange!=null){
  95. cur_timeRange.finish_count+=response.data.list.length
  96. const today = helper.getLocalDate();
  97. cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  98. await manager.updateRecord(today,cur_day_data.get(today));
  99. }
  100. for (let index = 0; index < response.data.list.length; index++) {
  101. const element = response.data.list[index];
  102. const info = materialId_data_list[element.materialId]
  103. if(element.hasAnchorInfo){
  104. await origin_data_controllers.createOriginData(
  105. {
  106. video_id:element.awemeId,
  107. materialId:element.materialId,
  108. video_link:element.awemeUrl,
  109. title:CMD.subTitle(element.title),
  110. publish_time:new Date(element.publishTime),
  111. kepp_num:info.kepp_num,
  112. comment_num:info.comment_num,
  113. like_num:info.like_num,
  114. shared_num:info.shared_num,
  115. is_guajian:1,
  116. guajian_link:element.landingUrl,
  117. status:0,
  118. createTime:info.createTime,
  119. updateTime:info.updateTime
  120. }
  121. )
  122. }
  123. }
  124. console.log(" finish processDetailTask")
  125. }catch (error) {
  126. console.error("processDetailTask:",error)
  127. }finally{
  128. // global.setTimeout(processDetailTask,500)
  129. }
  130. }
  131. async function process_material_list(response,page) {
  132. try{
  133. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  134. FilterConfig = JSON.parse(FilterConfig)
  135. let materialId_list = []
  136. let materialId_data_list = []
  137. if(!response.success){
  138. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  139. }
  140. if(response.data == undefined||response.data == null){
  141. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  142. }
  143. if(response.data.list == undefined||response.data.list == null){
  144. throw "没有数据了"
  145. }
  146. if(response.data.list.length<=0){
  147. throw "没有数据了"
  148. }
  149. if(cur_timeRange!=null){
  150. const today = helper.getLocalDate();
  151. // console.log("response.data:",response.data,today)
  152. if(cur_day_data.get(today)[cur_timeRange.start]){
  153. console.log("response.data.count:",response.data.count)
  154. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data.get(today)[cur_timeRange.start])
  155. cur_timeRange.count = cur_timeRange.count + response.data.list.length
  156. cur_timeRange.cur_page = page
  157. cur_day_data.get(today)[cur_timeRange.start].count = response.data.count
  158. cur_day_data.get(today)[cur_timeRange.start].cur_page = page
  159. }else{
  160. console.log("cur_day_data空")
  161. }
  162. manager.updateRecord(today,cur_day_data.get(today));
  163. }
  164. console.log("materialId_list:",response.data.list.length)
  165. for (let index = 0; index < response.data.list.length; index++) {
  166. const origin_element = response.data.list[index];
  167. const result = await origin_data_controllers.getOriginData({
  168. materialId:origin_element.materialId})
  169. if(result.success){
  170. // await origin_data_controllers.updateOriginData({id:result.data.id},
  171. // {
  172. // kepp_num:origin_element.favoriteCount.count,
  173. // comment_num:origin_element.commentCount.count,
  174. // like_num:origin_element.likeCount.count,
  175. // shared_num:origin_element.shareCount.count
  176. // })
  177. }else{
  178. if(CMD.isRight(FilterConfig,origin_element)){
  179. // materialId_list.push(origin_element.materialId)
  180. // materialId_data_list[origin_element.materialId] = {
  181. // kepp_num:origin_element.favoriteCount.count,
  182. // comment_num:origin_element.commentCount.count,
  183. // like_num:origin_element.likeCount.count,
  184. // shared_num:origin_element.shareCount.count
  185. // }
  186. console.log("materialId:",origin_element.materialId)
  187. }
  188. }
  189. }
  190. console.log("materialId_list:",materialId_list)
  191. return {materialId_list:materialId_list,materialId_data_list:materialId_data_list,count:response.data.list.length}
  192. }catch(e){
  193. console.log("materialId_list:",e)
  194. return null
  195. }
  196. }
  197. async function finish_material_list(response,page) {
  198. if(!response.success){
  199. console.error("finish_material_list:",response)
  200. }else{
  201. let detail_item = await process_material_list(response,page,null)
  202. detail_task.push(detail_item)
  203. }
  204. }
  205. async function process_update_token() {
  206. try{
  207. let response = await pullDataService.getToken()
  208. console.log("token:",response.data.token)
  209. }catch(e){
  210. console.error("process_update_token:",e)
  211. }finally{
  212. // setTimeout(process_update_token,200)
  213. }
  214. }
  215. async function process_novel_material_list() {
  216. try{
  217. if(task_params.length>0){
  218. let element = task_params.shift()
  219. const pull_task_item = get_pull_data_task();
  220. await pull_task_item.runTask(
  221. finish_material_list,
  222. cur_timeRange,
  223. element.page,
  224. element.limit
  225. );
  226. await new Promise(resolve => setTimeout(resolve, 200));
  227. }
  228. }catch(e){
  229. }finally{
  230. if(task_params.length>0){
  231. setTimeout(process_novel_material_list,200)
  232. }
  233. }
  234. }
  235. async function processTask(){
  236. try{
  237. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  238. FilterConfig = JSON.parse(FilterConfig)
  239. let response = await pullDataService.get_novel_material_list(cur_timeRange,cur_page,500)
  240. if(response.success){
  241. if(response.data.list.length<=0){
  242. cur_page = -1;
  243. throw response
  244. }
  245. }else{
  246. cur_page = -1;
  247. throw response
  248. }
  249. // console.log("get_novel_material_list:",response)
  250. let detail_item = await process_material_list(response,cur_page,cur_timeRange)
  251. // { page: 1, offset: 0, limit: 500 },
  252. // task_params = helper.getPaginationParams(detail_item.count,500)
  253. // task_params.shift()
  254. // detail_task.push(detail_item)
  255. console.log("detail_item:",detail_item)
  256. await processDetailTask(detail_item)
  257. console.log("processTask over!")
  258. }catch(e){
  259. console.error("PULL_DATA: error:",e)
  260. }finally{
  261. if(cur_page!=-1){
  262. cur_page++;
  263. global.setTimeout(processTask,1000)
  264. }
  265. }
  266. }
  267. CMD.subTitle = function(title){
  268. if(title.length>299){
  269. return title.substring(0, 299);
  270. }
  271. return title
  272. }
  273. CMD.isRight = function(FilterConfig,origin_element){
  274. console.log("origin_element.likeCount.count:",origin_element.likeCount.count,FilterConfig.like_num)
  275. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  276. return false
  277. }
  278. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  279. return false
  280. }
  281. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  282. return false
  283. }
  284. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  285. return false
  286. }
  287. return true
  288. }
  289. CMD.init = async function(){
  290. // await startConsumer();
  291. redis_help.connect(async ()=>{
  292. startConsumer()
  293. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  294. PullDataConfig = JSON.parse(PullDataConfig)
  295. let response = await pullDataService.getToken()
  296. console.log("token:",response)
  297. // processTask()
  298. _24HourRanges = PullDataConfig
  299. // 定义任务回调函数
  300. let key = helper.getLocalDate();
  301. let record_res = await manager.getRecord(key);
  302. if(record_res!=null){
  303. if(!cur_day_data.has(key)){
  304. cur_day_data.set(key,record_res.content)
  305. }
  306. }
  307. await pullDataService.getToken()
  308. // 启动调度器
  309. CMD.startScheduler(taskCallback);
  310. // process_update_token()
  311. })
  312. }
  313. // 生成24小时的时间区间
  314. CMD.generate24HourRanges = function() {
  315. // const ranges = [];
  316. // const today = new Date();
  317. // today.setMinutes(0);
  318. // today.setSeconds(0);
  319. // today.setMilliseconds(0);
  320. // for (let hour = 0; hour < 24; hour++) {
  321. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  322. // let periodName = '';
  323. // // 设置当天的小时
  324. // today.setHours(hour);
  325. // const timestamp = today.getTime();
  326. // // 根据时间划分时段
  327. // if (hour >= 0 && hour < 6) {
  328. // periodName = '凌晨区间';
  329. // } else if (hour >= 6 && hour < 9) {
  330. // periodName = '早晨区间';
  331. // } else if (hour >= 9 && hour < 12) {
  332. // periodName = '上午区间';
  333. // } else if (hour >= 12 && hour < 14) {
  334. // periodName = '中午区间';
  335. // } else if (hour >= 14 && hour < 18) {
  336. // periodName = '下午区间';
  337. // } else if (hour >= 18 && hour < 22) {
  338. // periodName = '晚上区间';
  339. // } else {
  340. // periodName = '深夜区间';
  341. // }
  342. // ranges.push({
  343. // name: periodName,
  344. // start: timeString,
  345. // timestamp: timestamp,
  346. // hour:hour,
  347. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  348. // });
  349. // }
  350. let ranges = [
  351. {
  352. name: '凌晨区间',
  353. start: '00:00',
  354. timestamp: 1733760000000,
  355. hour: 0,
  356. timeFormat: '12/10/2024, 12:00:00 AM'
  357. },
  358. {
  359. name: '凌晨区间',
  360. start: '01:00',
  361. timestamp: 1733763600000,
  362. hour: 1,
  363. timeFormat: '12/10/2024, 1:00:00 AM'
  364. },
  365. {
  366. name: '凌晨区间',
  367. start: '02:00',
  368. timestamp: 1733767200000,
  369. hour: 2,
  370. timeFormat: '12/10/2024, 2:00:00 AM'
  371. },
  372. {
  373. name: '凌晨区间',
  374. start: '03:00',
  375. timestamp: 1733770800000,
  376. hour: 3,
  377. timeFormat: '12/10/2024, 3:00:00 AM'
  378. },
  379. {
  380. name: '凌晨区间',
  381. start: '04:00',
  382. timestamp: 1733774400000,
  383. hour: 4,
  384. timeFormat: '12/10/2024, 4:00:00 AM'
  385. },
  386. {
  387. name: '凌晨区间',
  388. start: '05:00',
  389. timestamp: 1733778000000,
  390. hour: 5,
  391. timeFormat: '12/10/2024, 5:00:00 AM'
  392. },
  393. {
  394. name: '早晨区间',
  395. start: '06:00',
  396. timestamp: 1733781600000,
  397. hour: 6,
  398. timeFormat: '12/10/2024, 6:00:00 AM'
  399. },
  400. {
  401. name: '早晨区间',
  402. start: '07:00',
  403. timestamp: 1733785200000,
  404. hour: 7,
  405. timeFormat: '12/10/2024, 7:00:00 AM'
  406. },
  407. {
  408. name: '早晨区间',
  409. start: '08:00',
  410. timestamp: 1733788800000,
  411. hour: 8,
  412. timeFormat: '12/10/2024, 8:00:00 AM'
  413. },
  414. {
  415. name: '上午区间',
  416. start: '09:00',
  417. timestamp: 1733792400000,
  418. hour: 9,
  419. timeFormat: '12/10/2024, 9:00:00 AM'
  420. },
  421. {
  422. name: '上午区间',
  423. start: '10:00',
  424. timestamp: 1733796000000,
  425. hour: 10,
  426. timeFormat: '12/10/2024, 10:00:00 AM'
  427. },
  428. {
  429. name: '上午区间',
  430. start: '11:00',
  431. timestamp: 1733799600000,
  432. hour: 11,
  433. timeFormat: '12/10/2024, 11:00:00 AM'
  434. },
  435. {
  436. name: '中午区间',
  437. start: '12:00',
  438. timestamp: 1733803200000,
  439. hour: 12,
  440. timeFormat: '12/10/2024, 12:00:00 PM'
  441. },
  442. {
  443. name: '中午区间',
  444. start: '13:00',
  445. timestamp: 1733806800000,
  446. hour: 13,
  447. timeFormat: '12/10/2024, 1:00:00 PM'
  448. },
  449. {
  450. name: '下午区间',
  451. start: '14:00',
  452. timestamp: 1733810400000,
  453. hour: 14,
  454. timeFormat: '12/10/2024, 2:00:00 PM'
  455. },
  456. {
  457. name: '下午区间',
  458. start: '15:00',
  459. timestamp: 1733814000000,
  460. hour: 15,
  461. timeFormat: '12/10/2024, 3:00:00 PM'
  462. },
  463. {
  464. name: '下午区间',
  465. start: '16:00',
  466. timestamp: 1733817600000,
  467. hour: 16,
  468. timeFormat: '12/10/2024, 4:00:00 PM'
  469. },
  470. {
  471. name: '下午区间',
  472. start: '17:00',
  473. timestamp: 1733821200000,
  474. hour: 17,
  475. timeFormat: '12/10/2024, 5:00:00 PM'
  476. },
  477. {
  478. name: '晚上区间',
  479. start: '18:00',
  480. timestamp: 1733824800000,
  481. hour: 18,
  482. timeFormat: '12/10/2024, 6:00:00 PM'
  483. },
  484. {
  485. name: '晚上区间',
  486. start: '19:00',
  487. timestamp: 1733828400000,
  488. hour: 19,
  489. timeFormat: '12/10/2024, 7:00:00 PM'
  490. },
  491. {
  492. name: '晚上区间',
  493. start: '20:45',
  494. timestamp: 1733832000000,
  495. hour: 20,
  496. timeFormat: '12/10/2024, 8:00:00 PM'
  497. },
  498. {
  499. name: '晚上区间',
  500. start: '21:00',
  501. timestamp: 1733835600000,
  502. hour: 21,
  503. timeFormat: '12/10/2024, 9:00:00 PM'
  504. },
  505. {
  506. name: '深夜区间',
  507. start: '22:00',
  508. timestamp: 1733839200000,
  509. hour: 22,
  510. timeFormat: '12/10/2024, 10:00:00 PM'
  511. },
  512. {
  513. name: '深夜区间',
  514. start: '23:00',
  515. timestamp: 1733842800000,
  516. hour: 23,
  517. timeFormat: '12/10/2024, 11:00:00 PM'
  518. }
  519. ]
  520. return ranges;
  521. }
  522. CMD.stopScheduler = function() {
  523. jobs.forEach(({ job }) => job.cancel());
  524. jobs = [];
  525. console.log('调度器已停止,所有任务已清除');
  526. }
  527. CMD.startScheduler = function(taskCallback) {
  528. CMD.stopScheduler()
  529. // 为每个时间点创建定时任务
  530. _24HourRanges.forEach(timeRange => {
  531. // 解析小时
  532. const hour = parseInt(timeRange.start.split(':')[0]);
  533. const minute = parseInt(timeRange.start.split(':')[1]);
  534. // 创建定时任务 - 在每天的指定小时整点执行
  535. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  536. try {
  537. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  538. // 执行回调函数
  539. await taskCallback({
  540. timeRange,
  541. executionTime: new Date()
  542. });
  543. } catch (error) {
  544. // 记录失败
  545. console.error('任务执行错误:', error);
  546. }
  547. });
  548. jobs.push({
  549. job,
  550. timeRange
  551. });
  552. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  553. });
  554. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  555. }
  556. if(!config.isDebug){
  557. CMD.init()
  558. }