PULL_DATA.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. const CMD = {}
  2. const redis_help = require('../src/use_redis');
  3. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  4. const config = require('../etc/config.json');
  5. const DailyRecordManager = require('./daily_records');
  6. const schedule = require('node-schedule');
  7. const manager = new DailyRecordManager();
  8. const rabbitMq = require('../src/mq/rabbit-mq');
  9. const helper = require('../src/helper');
  10. const PullDataService = require('../src/PullDataService');
  11. const tools = require('../tools');
  12. var _24HourRanges = []
  13. var jobs = []
  14. var cur_day_data = new Map()
  15. var cur_timeRange = null
  16. var pullDataService = new PullDataService(redis_help)
  17. var cur_page = 0
  18. const messageHandler = async (msg) => {
  19. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  20. PullDataConfig = JSON.parse(PullDataConfig)
  21. _24HourRanges = PullDataConfig
  22. CMD.startScheduler(taskCallback)
  23. };
  24. const taskCallback = async (context) => {
  25. var { timeRange, executionTime } = context;
  26. console.log('执行任务:', {
  27. 时间区间: timeRange.name,
  28. 开始时间: timeRange.start,
  29. 执行时间: executionTime
  30. });
  31. const today = helper.getLocalDate();
  32. cur_day_data.forEach((v,k)=>{
  33. if(k==today){
  34. }else{
  35. cur_day_data.set(k, {})
  36. }
  37. })
  38. if(!cur_day_data.has(today)){
  39. cur_day_data.set(today,{})
  40. }
  41. if(!cur_day_data.get(today)[timeRange.start]){
  42. cur_day_data.get(today)[timeRange.start] = {name:timeRange.name,interval_minute:timeRange.interval_minute,finish_count:0}
  43. }
  44. console.log("cur_day_data[today]:",cur_day_data.get(today),today)
  45. const existingRecord = await manager.getRecord(today);
  46. if (existingRecord) {
  47. // 如果存在今天的记录,则更新
  48. const result = await manager.updateRecord(today, cur_day_data.get(today));
  49. console.log("更新记录结果:", result);
  50. } else {
  51. // 如果不存在今天的记录,则创建新记录
  52. const result = await manager.createRecord(today, cur_day_data.get(today));
  53. console.log("创建新记录结果:", result);
  54. }
  55. cur_timeRange = timeRange
  56. cur_timeRange.today = today
  57. cur_timeRange.finish_count = 0
  58. cur_page = 0
  59. await processTask();
  60. };
  61. // 启动消费者
  62. async function startConsumer() {
  63. try {
  64. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig");
  65. } catch (error) {
  66. console.error('启动消费者失败:', error);
  67. }
  68. }
  69. function get_detail_by_materialId(list,materialId){
  70. for (let index = 0; index < list.length; index++) {
  71. const element = list[index];
  72. if(element.materialId==materialId){
  73. return element
  74. }
  75. }
  76. return null
  77. }
  78. function generate_pull_time_data(list,material_list){
  79. let pull_data = []
  80. for (let index = 0; index < material_list.length; index++) {
  81. const element = material_list[index];
  82. let info = get_detail_by_materialId(list,element.materialId)
  83. if(info!=null){
  84. let new_data = {
  85. awemeId:tools.getOriginVideoId(info.awemeUrl),
  86. materialId:info.materialId,
  87. awemeUrl:info.awemeUrl,
  88. title:info.title,
  89. publishTime:info.publishTime,
  90. hasAnchorInfo:info.hasAnchorInfo,
  91. landingUrl:info.landingUrl,
  92. createTime:info.createTime,
  93. updateTime:info.updateTime,
  94. favoriteCount:element.favoriteCount.count,
  95. commentCount:element.commentCount.count,
  96. likeCount:element.likeCount.count,
  97. shareCount:element.shareCount.count
  98. }
  99. pull_data.push(new_data)
  100. }
  101. }
  102. sendPullDataMQMessage(JSON.stringify({cmd:"on_recv_pull_data",data:pull_data,today:cur_timeRange.today}))
  103. }
  104. async function processDetailTask(task_item) {
  105. try{
  106. console.log(" start processDetailTask")
  107. let materialId_list = task_item.materialId_list
  108. let materialId_data_list = task_item.materialId_data_list
  109. let response = await pullDataService.get_detail(task_item.id_list)
  110. if(!response.success){
  111. console.log("get_detail:",response)
  112. throw {msg:response,timeRange:cur_timeRange,fun:"get_detail",materialId_list:materialId_list}
  113. }
  114. if(cur_timeRange!=null){
  115. cur_timeRange.finish_count+=materialId_list.length
  116. const today = cur_timeRange.today;
  117. cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  118. await manager.updateRecord(today,cur_day_data.get(today));
  119. }
  120. for (let j = 0; j < materialId_list.length; j++) {
  121. const materialId = materialId_list[j];
  122. for (let index = 0; index < response.data.list.length; index++) {
  123. const element = response.data.list[index];
  124. if(element.materialId==materialId){
  125. const info = materialId_data_list[element.materialId]
  126. if(element.hasAnchorInfo){
  127. await origin_data_controllers.createOriginData(
  128. {
  129. video_id:tools.getOriginVideoId(element.awemeUrl),
  130. materialId:element.materialId,
  131. video_link:element.awemeUrl,
  132. title:CMD.subTitle(element.title),
  133. publish_time:new Date(element.publishTime),
  134. kepp_num:info.kepp_num,
  135. comment_num:info.comment_num,
  136. like_num:info.like_num,
  137. shared_num:info.shared_num,
  138. is_guajian:1,
  139. guajian_link:element.landingUrl,
  140. status:0,
  141. createTime_new:new Date(element.createTime),
  142. updateTime_new:new Date(element.updateTime),
  143. create_day:new Date(helper.getLocalDate())
  144. }
  145. )
  146. }
  147. break
  148. }
  149. }
  150. }
  151. generate_pull_time_data(response.data.list,task_item.list)
  152. console.log(" finish processDetailTask")
  153. }catch (error) {
  154. console.error("processDetailTask:",error)
  155. }finally{
  156. // global.setTimeout(processDetailTask,500)
  157. }
  158. }
  159. async function process_material_list(response,page) {
  160. try{
  161. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  162. FilterConfig = JSON.parse(FilterConfig)
  163. let materialId_list = []
  164. let materialId_data_list = []
  165. if(!response.success){
  166. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  167. }
  168. if(response.data == undefined||response.data == null){
  169. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  170. }
  171. if(response.data.list == undefined||response.data.list == null){
  172. throw "没有数据了"
  173. }
  174. if(response.data.list.length<=0){
  175. throw "没有数据了"
  176. }
  177. if(cur_timeRange!=null){ //统计
  178. const today = cur_timeRange.today;
  179. // console.log("response.data:",response.data,today)
  180. if(cur_day_data.get(today)[cur_timeRange.start]){
  181. console.log("response.data.count:",response.data.count)
  182. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data.get(today)[cur_timeRange.start])
  183. cur_timeRange.count = cur_timeRange.count + response.data.list.length
  184. cur_timeRange.cur_page = page
  185. cur_day_data.get(today)[cur_timeRange.start].start_time = cur_timeRange.start_time
  186. cur_day_data.get(today)[cur_timeRange.start].pull_day = cur_timeRange.pull_day
  187. cur_day_data.get(today)[cur_timeRange.start].pull_time = cur_timeRange.pull_time ||{}
  188. cur_day_data.get(today)[cur_timeRange.start].count = response.data.count
  189. cur_day_data.get(today)[cur_timeRange.start].cur_page = page
  190. }else{
  191. console.log("cur_day_data空")
  192. }
  193. manager.updateRecord(today,cur_day_data.get(today));
  194. }
  195. for (let index = 0; index < response.data.list.length; index++) {
  196. const origin_element = response.data.list[index];
  197. materialId_list.push(origin_element.materialId)
  198. materialId_data_list[origin_element.materialId] = {
  199. kepp_num:origin_element.favoriteCount.count,
  200. comment_num:origin_element.commentCount.count,
  201. like_num:origin_element.likeCount.count,
  202. shared_num:origin_element.shareCount.count
  203. }
  204. }
  205. let result = await origin_data_controllers.findNonExistentMaterialIds(materialId_list)
  206. return {list:response.data.list,id_list:materialId_list,materialId_list:result.data.nonExistentIds,materialId_data_list:materialId_data_list,count:response.data.list.length}
  207. }catch(e){
  208. console.log("materialId_list:",e)
  209. return null
  210. }
  211. }
  212. async function processTask(){
  213. try{
  214. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  215. FilterConfig = JSON.parse(FilterConfig)
  216. let response = await pullDataService.get_novel_material_list(cur_timeRange,cur_page,500)
  217. if(response.success){
  218. if(response.data.list.length<=0){
  219. cur_page = -1;
  220. throw response
  221. }
  222. }else{
  223. cur_page = -1;
  224. throw response
  225. }
  226. let detail_item = await process_material_list(response,cur_page,cur_timeRange)
  227. await processDetailTask(detail_item)
  228. // if(detail_item.materialId_list.length<=0){
  229. // }else{
  230. // await processDetailTask(detail_item)
  231. // }
  232. console.log("processTask over!")
  233. }catch(e){
  234. console.error("PULL_DATA: error:",e)
  235. }finally{
  236. if(cur_page!=-1){
  237. cur_page++;
  238. global.setTimeout(processTask,300)
  239. }
  240. }
  241. }
  242. CMD.subTitle = function(title){
  243. if(title.length>299){
  244. return title.substring(0, 299);
  245. }
  246. return title
  247. }
  248. CMD.isRight = function(FilterConfig,origin_element){
  249. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  250. return false
  251. }
  252. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  253. return false
  254. }
  255. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  256. return false
  257. }
  258. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  259. return false
  260. }
  261. return true
  262. }
  263. CMD.init = async function(){
  264. // await startConsumer();
  265. redis_help.connect(async ()=>{
  266. startConsumer()
  267. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  268. PullDataConfig = JSON.parse(PullDataConfig)
  269. // processTask()
  270. _24HourRanges = PullDataConfig
  271. // 定义任务回调函数
  272. let key = helper.getLocalDate();
  273. let record_res = await manager.getRecord(key);
  274. if(record_res!=null){
  275. if(!cur_day_data.has(key)){
  276. cur_day_data.set(key,record_res.content)
  277. }
  278. }
  279. await pullDataService.getToken()
  280. // 启动调度器
  281. CMD.startScheduler(taskCallback);
  282. })
  283. }
  284. // 生成24小时的时间区间
  285. CMD.generate24HourRanges = function() {
  286. // const ranges = [];
  287. // const today = new Date();
  288. // today.setMinutes(0);
  289. // today.setSeconds(0);
  290. // today.setMilliseconds(0);
  291. // for (let hour = 0; hour < 24; hour++) {
  292. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  293. // let periodName = '';
  294. // // 设置当天的小时
  295. // today.setHours(hour);
  296. // const timestamp = today.getTime();
  297. // // 根据时间划分时段
  298. // if (hour >= 0 && hour < 6) {
  299. // periodName = '凌晨区间';
  300. // } else if (hour >= 6 && hour < 9) {
  301. // periodName = '早晨区间';
  302. // } else if (hour >= 9 && hour < 12) {
  303. // periodName = '上午区间';
  304. // } else if (hour >= 12 && hour < 14) {
  305. // periodName = '中午区间';
  306. // } else if (hour >= 14 && hour < 18) {
  307. // periodName = '下午区间';
  308. // } else if (hour >= 18 && hour < 22) {
  309. // periodName = '晚上区间';
  310. // } else {
  311. // periodName = '深夜区间';
  312. // }
  313. // ranges.push({
  314. // name: periodName,
  315. // start: timeString,
  316. // timestamp: timestamp,
  317. // hour:hour,
  318. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  319. // });
  320. // }
  321. let ranges = [
  322. {
  323. name: '凌晨区间',
  324. start: '00:00',
  325. timestamp: 1733760000000,
  326. hour: 0,
  327. timeFormat: '12/10/2024, 12:00:00 AM'
  328. },
  329. {
  330. name: '凌晨区间',
  331. start: '01:00',
  332. timestamp: 1733763600000,
  333. hour: 1,
  334. timeFormat: '12/10/2024, 1:00:00 AM'
  335. },
  336. {
  337. name: '凌晨区间',
  338. start: '02:00',
  339. timestamp: 1733767200000,
  340. hour: 2,
  341. timeFormat: '12/10/2024, 2:00:00 AM'
  342. },
  343. {
  344. name: '凌晨区间',
  345. start: '03:00',
  346. timestamp: 1733770800000,
  347. hour: 3,
  348. timeFormat: '12/10/2024, 3:00:00 AM'
  349. },
  350. {
  351. name: '凌晨区间',
  352. start: '04:00',
  353. timestamp: 1733774400000,
  354. hour: 4,
  355. timeFormat: '12/10/2024, 4:00:00 AM'
  356. },
  357. {
  358. name: '凌晨区间',
  359. start: '05:00',
  360. timestamp: 1733778000000,
  361. hour: 5,
  362. timeFormat: '12/10/2024, 5:00:00 AM'
  363. },
  364. {
  365. name: '早晨区间',
  366. start: '06:00',
  367. timestamp: 1733781600000,
  368. hour: 6,
  369. timeFormat: '12/10/2024, 6:00:00 AM'
  370. },
  371. {
  372. name: '早晨区间',
  373. start: '07:00',
  374. timestamp: 1733785200000,
  375. hour: 7,
  376. timeFormat: '12/10/2024, 7:00:00 AM'
  377. },
  378. {
  379. name: '早晨区间',
  380. start: '08:00',
  381. timestamp: 1733788800000,
  382. hour: 8,
  383. timeFormat: '12/10/2024, 8:00:00 AM'
  384. },
  385. {
  386. name: '上午区间',
  387. start: '09:00',
  388. timestamp: 1733792400000,
  389. hour: 9,
  390. timeFormat: '12/10/2024, 9:00:00 AM'
  391. },
  392. {
  393. name: '上午区间',
  394. start: '10:00',
  395. timestamp: 1733796000000,
  396. hour: 10,
  397. timeFormat: '12/10/2024, 10:00:00 AM'
  398. },
  399. {
  400. name: '上午区间',
  401. start: '11:00',
  402. timestamp: 1733799600000,
  403. hour: 11,
  404. timeFormat: '12/10/2024, 11:00:00 AM'
  405. },
  406. {
  407. name: '中午区间',
  408. start: '12:00',
  409. timestamp: 1733803200000,
  410. hour: 12,
  411. timeFormat: '12/10/2024, 12:00:00 PM'
  412. },
  413. {
  414. name: '中午区间',
  415. start: '13:00',
  416. timestamp: 1733806800000,
  417. hour: 13,
  418. timeFormat: '12/10/2024, 1:00:00 PM'
  419. },
  420. {
  421. name: '下午区间',
  422. start: '14:00',
  423. timestamp: 1733810400000,
  424. hour: 14,
  425. timeFormat: '12/10/2024, 2:00:00 PM'
  426. },
  427. {
  428. name: '下午区间',
  429. start: '15:00',
  430. timestamp: 1733814000000,
  431. hour: 15,
  432. timeFormat: '12/10/2024, 3:00:00 PM'
  433. },
  434. {
  435. name: '下午区间',
  436. start: '16:00',
  437. timestamp: 1733817600000,
  438. hour: 16,
  439. timeFormat: '12/10/2024, 4:00:00 PM'
  440. },
  441. {
  442. name: '下午区间',
  443. start: '17:00',
  444. timestamp: 1733821200000,
  445. hour: 17,
  446. timeFormat: '12/10/2024, 5:00:00 PM'
  447. },
  448. {
  449. name: '晚上区间',
  450. start: '18:00',
  451. timestamp: 1733824800000,
  452. hour: 18,
  453. timeFormat: '12/10/2024, 6:00:00 PM'
  454. },
  455. {
  456. name: '晚上区间',
  457. start: '19:00',
  458. timestamp: 1733828400000,
  459. hour: 19,
  460. timeFormat: '12/10/2024, 7:00:00 PM'
  461. },
  462. {
  463. name: '晚上区间',
  464. start: '20:45',
  465. timestamp: 1733832000000,
  466. hour: 20,
  467. timeFormat: '12/10/2024, 8:00:00 PM'
  468. },
  469. {
  470. name: '晚上区间',
  471. start: '21:00',
  472. timestamp: 1733835600000,
  473. hour: 21,
  474. timeFormat: '12/10/2024, 9:00:00 PM'
  475. },
  476. {
  477. name: '深夜区间',
  478. start: '22:00',
  479. timestamp: 1733839200000,
  480. hour: 22,
  481. timeFormat: '12/10/2024, 10:00:00 PM'
  482. },
  483. {
  484. name: '深夜区间',
  485. start: '23:00',
  486. timestamp: 1733842800000,
  487. hour: 23,
  488. timeFormat: '12/10/2024, 11:00:00 PM'
  489. }
  490. ]
  491. return ranges;
  492. }
  493. CMD.stopScheduler = function() {
  494. jobs.forEach(({ job }) => job.cancel());
  495. jobs = [];
  496. console.log('调度器已停止,所有任务已清除');
  497. }
  498. CMD.startScheduler = function(taskCallback) {
  499. CMD.stopScheduler()
  500. // 为每个时间点创建定时任务
  501. _24HourRanges.forEach(timeRange => {
  502. // 解析小时
  503. const hour = parseInt(timeRange.start.split(':')[0]);
  504. const minute = parseInt(timeRange.start.split(':')[1]);
  505. // 创建定时任务 - 在每天的指定小时整点执行
  506. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  507. try {
  508. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  509. await pullDataService.getToken()
  510. // 执行回调函数
  511. await taskCallback({
  512. timeRange,
  513. executionTime: new Date()
  514. });
  515. } catch (error) {
  516. // 记录失败
  517. console.error('任务执行错误:', error);
  518. }
  519. });
  520. jobs.push({
  521. job,
  522. timeRange
  523. });
  524. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  525. });
  526. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  527. }
  528. async function sendPullDataMQMessage(message,routingKey = "on_recv_pull_data") {
  529. try {
  530. if(routingKey!=null){
  531. await rabbitMq.producerDirectMsg( message,"exchange_pull_data_system",routingKey);
  532. }else{
  533. await rabbitMq.producerDirectMsg( message,"exchange_pull_data_system");
  534. }
  535. console.log('消息发送成功');
  536. } catch (error) {
  537. console.error('发送消息失败:', error);
  538. }
  539. }
  540. if(!config.isDebug){
  541. CMD.init()
  542. }