PULL_DATA.js 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627
  1. const CMD = {}
  2. const redis_help = require('../src/use_redis');
  3. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  4. const config = require('../etc/config.json');
  5. const DailyRecordManager = require('./daily_records');
  6. const schedule = require('node-schedule');
  7. const manager = new DailyRecordManager();
  8. manager.init("daily_records")
  9. const rabbitMq = require('../src/mq/rabbit-mq');
  10. const helper = require('../src/helper');
  11. const PullDataService = require('../src/PullDataService');
  12. const { default: axios } = require('axios');
  13. var _24HourRanges = []
  14. var jobs = []
  15. var cur_day_data = new Map()
  16. var cur_timeRange = null
  17. var pullDataService = new PullDataService(redis_help)
  18. var cur_page = 0
  19. const messageHandler = async (msg) => {
  20. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  21. PullDataConfig = JSON.parse(PullDataConfig)
  22. _24HourRanges = PullDataConfig
  23. CMD.startScheduler(taskCallback)
  24. };
  25. const taskCallback = async (context) => {
  26. var { timeRange, executionTime } = context;
  27. console.log('执行任务:', {
  28. 时间区间: timeRange.name,
  29. 开始时间: timeRange.start,
  30. 执行时间: executionTime
  31. });
  32. const today = helper.getLocalDate();
  33. cur_day_data.forEach((v,k)=>{
  34. if(k==today){
  35. }else{
  36. cur_day_data.set(k, {})
  37. }
  38. })
  39. if(!cur_day_data.has(today)){
  40. cur_day_data.set(today,{})
  41. }
  42. if(!cur_day_data.get(today)[timeRange.start]){
  43. cur_day_data.get(today)[timeRange.start] = {name:timeRange.name,interval_minute:timeRange.interval_minute,finish_count:0}
  44. }
  45. console.log("cur_day_data[today]:",cur_day_data.get(today),today)
  46. const existingRecord = await manager.getRecord(today);
  47. if (existingRecord) {
  48. // 如果存在今天的记录,则更新
  49. const result = await manager.updateRecord(today, cur_day_data.get(today));
  50. console.log("更新记录结果:", result);
  51. } else {
  52. // 如果不存在今天的记录,则创建新记录
  53. const result = await manager.createRecord(today, cur_day_data.get(today));
  54. console.log("创建新记录结果:", result);
  55. }
  56. cur_timeRange = timeRange
  57. cur_timeRange.today = today
  58. cur_timeRange.finish_count = 0
  59. cur_page = 0
  60. await processTask();
  61. };
  62. // 启动消费者
  63. async function startConsumer() {
  64. try {
  65. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig");
  66. } catch (error) {
  67. console.error('启动消费者失败:', error);
  68. }
  69. }
  70. function get_detail_by_materialId(list,materialId){
  71. for (let index = 0; index < list.length; index++) {
  72. const element = list[index];
  73. if(element.materialId==materialId){
  74. return element
  75. }
  76. }
  77. return null
  78. }
  79. function generate_pull_time_data(list,material_list){
  80. let pull_data = []
  81. for (let index = 0; index < material_list.length; index++) {
  82. const element = material_list[index];
  83. let info = get_detail_by_materialId(list,element.materialId)
  84. if(info!=null){
  85. let new_data = {
  86. awemeId:info.awemeId,
  87. materialId:info.materialId,
  88. awemeUrl:info.awemeUrl,
  89. title:info.title,
  90. publishTime:info.publishTime,
  91. hasAnchorInfo:info.hasAnchorInfo,
  92. landingUrl:info.landingUrl,
  93. createTime:info.createTime,
  94. updateTime:info.updateTime,
  95. favoriteCount:element.favoriteCount.count,
  96. commentCount:element.commentCount.count,
  97. likeCount:element.likeCount.count,
  98. shareCount:element.shareCount.count
  99. }
  100. pull_data.push(new_data)
  101. }
  102. }
  103. sendPullDataMQMessage(JSON.stringify({cmd:"on_recv_pull_data",data:pull_data,today:cur_timeRange.today}))
  104. }
  105. async function processDetailTask(task_item) {
  106. try{
  107. console.log(" start processDetailTask")
  108. let materialId_list = task_item.materialId_list
  109. let materialId_data_list = task_item.materialId_data_list
  110. // let response = await pullDataService.get_detail(task_item.id_list)
  111. // if(!response.success){
  112. // console.log("get_detail:",response)
  113. // throw {msg:response,timeRange:cur_timeRange,fun:"get_detail",materialId_list:materialId_list}
  114. // }
  115. if(cur_timeRange!=null){
  116. cur_timeRange.finish_count+=materialId_list.length
  117. const today = cur_timeRange.today;
  118. cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  119. await manager.updateRecord(today,cur_day_data.get(today));
  120. }
  121. let temp_list = []
  122. for (let index = 0; index < materialId_data_list.length; index++) {
  123. const element = materialId_data_list[index];
  124. if(element.likeCount.count>0){
  125. temp_list.push(element)
  126. }
  127. }
  128. // let temp_list = []
  129. // materialId_data_list.forEach((v,i)=>{
  130. // temp_list.push(v)
  131. // })
  132. // for (let index = 0; index < temp_list.length; index++) {
  133. // const info = temp_list[index];
  134. // await origin_data_controllers.createOriginData(
  135. // {
  136. // miniseriesId:info.miniseriesId,
  137. // miniseriesName:info.miniseriesName,
  138. // materialId:info.materialId,
  139. // publish_time:new Date(info.createTime),
  140. // kepp_num:info.favoriteCount.count,
  141. // comment_num:info.favoriteCount.count,
  142. // like_num:info.favoriteCount.count,
  143. // shared_num:info.favoriteCount.count,
  144. // is_guajian:1,
  145. // status:0,
  146. // createTime_new:new Date(info.createTime),
  147. // updateTime_new:new Date(info.updateTime),
  148. // create_day:new Date(info.createTime),
  149. // width:info.width,
  150. // height:info.height,
  151. // duration:info.duration,
  152. // videoMd5:info.videoMd5,
  153. // coverUrl:info.coverUrl,
  154. // materialType:info.materialType,
  155. // heat:info.heat
  156. // }
  157. // )
  158. // }
  159. sendPullDataMQMessage(JSON.stringify({cmd:"on_recv_pull_data",data:temp_list,today:cur_timeRange.today}))
  160. // generate_pull_time_data(response.data.list,task_item.list)
  161. console.log(" finish processDetailTask")
  162. }catch (error) {
  163. console.error("processDetailTask:",error)
  164. }finally{
  165. // global.setTimeout(processDetailTask,500)
  166. }
  167. }
  168. async function process_material_list(response,page) {
  169. try{
  170. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  171. FilterConfig = JSON.parse(FilterConfig)
  172. let materialId_list = []
  173. let materialId_data_list = []
  174. if(!response.success){
  175. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  176. }
  177. if(response.data == undefined||response.data == null){
  178. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  179. }
  180. if(response.data.list == undefined||response.data.list == null){
  181. throw "没有数据了"
  182. }
  183. if(response.data.list.length<=0){
  184. throw "没有数据了"
  185. }
  186. if(cur_timeRange!=null){ //统计
  187. const today = cur_timeRange.today;
  188. // console.log("response.data:",response.data,today)
  189. if(cur_day_data.get(today)[cur_timeRange.start]){
  190. console.log("response.data.count:",response.data.count)
  191. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data.get(today)[cur_timeRange.start])
  192. cur_timeRange.count = cur_timeRange.count + response.data.list.length
  193. cur_timeRange.cur_page = page
  194. cur_day_data.get(today)[cur_timeRange.start].start_time = cur_timeRange.start_time
  195. cur_day_data.get(today)[cur_timeRange.start].pull_day = cur_timeRange.pull_day
  196. cur_day_data.get(today)[cur_timeRange.start].pull_time = cur_timeRange.pull_time ||{}
  197. cur_day_data.get(today)[cur_timeRange.start].count = response.data.count
  198. cur_day_data.get(today)[cur_timeRange.start].cur_page = page
  199. }else{
  200. console.log("cur_day_data空")
  201. }
  202. manager.updateRecord(today,cur_day_data.get(today));
  203. }
  204. // for (let index = 0; index < response.data.list.length; index++) {
  205. // const origin_element = response.data.list[index];
  206. // materialId_list.push(origin_element.materialId)
  207. // materialId_data_list[origin_element.materialId] = origin_element
  208. // // materialId_data_list[origin_element.materialId] = {
  209. // // kepp_num:origin_element.favoriteCount.count,
  210. // // comment_num:origin_element.commentCount.count,
  211. // // like_num:origin_element.likeCount.count,
  212. // // shared_num:origin_element.shareCount.count,
  213. // // width:origin_element.width,
  214. // // height:origin_element.height,
  215. // // duration:origin_element.duration,
  216. // // heat:origin_element.heat.countStr,
  217. // // miniseriesId:origin_element.miniseriesId,
  218. // // miniseriesName:origin_element.miniseriesName,
  219. // // materialType:origin_element.materialType,
  220. // // videoMd5:origin_element.materialType,
  221. // // coverUrl:origin_element.coverUrl,
  222. // // }
  223. // }
  224. // let result = await origin_data_controllers.findNonExistentMaterialIds(materialId_list)
  225. return {list:response.data.list,id_list:[],materialId_list:[],materialId_data_list:response.data.list,count:response.data.list.length}
  226. }catch(e){
  227. console.log("materialId_list:",e)
  228. return null
  229. }
  230. }
  231. async function processTask(){
  232. try{
  233. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  234. FilterConfig = JSON.parse(FilterConfig)
  235. let response = await pullDataService.get_novel_material_list(cur_timeRange,cur_page,500)
  236. if(response.success){
  237. if(response.data.list.length<=0){
  238. cur_page = -1;
  239. throw response
  240. }
  241. }else{
  242. cur_page = -1;
  243. throw response
  244. }
  245. // console.log("response:",JSON.stringify(response))
  246. let detail_item = await process_material_list(response,cur_page,cur_timeRange)
  247. await processDetailTask(detail_item)
  248. console.log("processTask over!")
  249. }catch(e){
  250. console.error("PULL_DATA: error:",e)
  251. }finally{
  252. if(cur_page!=-1){
  253. cur_page++;
  254. global.setTimeout(processTask,1000)
  255. }
  256. }
  257. }
  258. CMD.subTitle = function(title){
  259. if(title.length>299){
  260. return title.substring(0, 299);
  261. }
  262. return title
  263. }
  264. CMD.isRight = function(FilterConfig,origin_element){
  265. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  266. return false
  267. }
  268. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  269. return false
  270. }
  271. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  272. return false
  273. }
  274. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  275. return false
  276. }
  277. return true
  278. }
  279. CMD.init = async function(){
  280. // await startConsumer();
  281. redis_help.connect(async ()=>{
  282. startConsumer()
  283. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  284. PullDataConfig = JSON.parse(PullDataConfig)
  285. // processTask()
  286. _24HourRanges = PullDataConfig
  287. // 定义任务回调函数
  288. let key = helper.getLocalDate();
  289. let record_res = await manager.getRecord(key);
  290. if(record_res!=null){
  291. if(!cur_day_data.has(key)){
  292. cur_day_data.set(key,record_res.content)
  293. }
  294. }
  295. await pullDataService.getToken()
  296. // 启动调度器
  297. CMD.startScheduler(taskCallback);
  298. })
  299. }
  300. // 生成24小时的时间区间
  301. CMD.generate24HourRanges = function() {
  302. // const ranges = [];
  303. // const today = new Date();
  304. // today.setMinutes(0);
  305. // today.setSeconds(0);
  306. // today.setMilliseconds(0);
  307. // for (let hour = 0; hour < 24; hour++) {
  308. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  309. // let periodName = '';
  310. // // 设置当天的小时
  311. // today.setHours(hour);
  312. // const timestamp = today.getTime();
  313. // // 根据时间划分时段
  314. // if (hour >= 0 && hour < 6) {
  315. // periodName = '凌晨区间';
  316. // } else if (hour >= 6 && hour < 9) {
  317. // periodName = '早晨区间';
  318. // } else if (hour >= 9 && hour < 12) {
  319. // periodName = '上午区间';
  320. // } else if (hour >= 12 && hour < 14) {
  321. // periodName = '中午区间';
  322. // } else if (hour >= 14 && hour < 18) {
  323. // periodName = '下午区间';
  324. // } else if (hour >= 18 && hour < 22) {
  325. // periodName = '晚上区间';
  326. // } else {
  327. // periodName = '深夜区间';
  328. // }
  329. // ranges.push({
  330. // name: periodName,
  331. // start: timeString,
  332. // timestamp: timestamp,
  333. // hour:hour,
  334. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  335. // });
  336. // }
  337. let ranges = [
  338. {
  339. name: '凌晨区间',
  340. start: '00:00',
  341. timestamp: 1733760000000,
  342. hour: 0,
  343. timeFormat: '12/10/2024, 12:00:00 AM'
  344. },
  345. {
  346. name: '凌晨区间',
  347. start: '01:00',
  348. timestamp: 1733763600000,
  349. hour: 1,
  350. timeFormat: '12/10/2024, 1:00:00 AM'
  351. },
  352. {
  353. name: '凌晨区间',
  354. start: '02:00',
  355. timestamp: 1733767200000,
  356. hour: 2,
  357. timeFormat: '12/10/2024, 2:00:00 AM'
  358. },
  359. {
  360. name: '凌晨区间',
  361. start: '03:00',
  362. timestamp: 1733770800000,
  363. hour: 3,
  364. timeFormat: '12/10/2024, 3:00:00 AM'
  365. },
  366. {
  367. name: '凌晨区间',
  368. start: '04:00',
  369. timestamp: 1733774400000,
  370. hour: 4,
  371. timeFormat: '12/10/2024, 4:00:00 AM'
  372. },
  373. {
  374. name: '凌晨区间',
  375. start: '05:00',
  376. timestamp: 1733778000000,
  377. hour: 5,
  378. timeFormat: '12/10/2024, 5:00:00 AM'
  379. },
  380. {
  381. name: '早晨区间',
  382. start: '06:00',
  383. timestamp: 1733781600000,
  384. hour: 6,
  385. timeFormat: '12/10/2024, 6:00:00 AM'
  386. },
  387. {
  388. name: '早晨区间',
  389. start: '07:00',
  390. timestamp: 1733785200000,
  391. hour: 7,
  392. timeFormat: '12/10/2024, 7:00:00 AM'
  393. },
  394. {
  395. name: '早晨区间',
  396. start: '08:00',
  397. timestamp: 1733788800000,
  398. hour: 8,
  399. timeFormat: '12/10/2024, 8:00:00 AM'
  400. },
  401. {
  402. name: '上午区间',
  403. start: '09:00',
  404. timestamp: 1733792400000,
  405. hour: 9,
  406. timeFormat: '12/10/2024, 9:00:00 AM'
  407. },
  408. {
  409. name: '上午区间',
  410. start: '10:00',
  411. timestamp: 1733796000000,
  412. hour: 10,
  413. timeFormat: '12/10/2024, 10:00:00 AM'
  414. },
  415. {
  416. name: '上午区间',
  417. start: '11:00',
  418. timestamp: 1733799600000,
  419. hour: 11,
  420. timeFormat: '12/10/2024, 11:00:00 AM'
  421. },
  422. {
  423. name: '中午区间',
  424. start: '12:00',
  425. timestamp: 1733803200000,
  426. hour: 12,
  427. timeFormat: '12/10/2024, 12:00:00 PM'
  428. },
  429. {
  430. name: '中午区间',
  431. start: '13:00',
  432. timestamp: 1733806800000,
  433. hour: 13,
  434. timeFormat: '12/10/2024, 1:00:00 PM'
  435. },
  436. {
  437. name: '下午区间',
  438. start: '14:00',
  439. timestamp: 1733810400000,
  440. hour: 14,
  441. timeFormat: '12/10/2024, 2:00:00 PM'
  442. },
  443. {
  444. name: '下午区间',
  445. start: '15:00',
  446. timestamp: 1733814000000,
  447. hour: 15,
  448. timeFormat: '12/10/2024, 3:00:00 PM'
  449. },
  450. {
  451. name: '下午区间',
  452. start: '16:00',
  453. timestamp: 1733817600000,
  454. hour: 16,
  455. timeFormat: '12/10/2024, 4:00:00 PM'
  456. },
  457. {
  458. name: '下午区间',
  459. start: '17:00',
  460. timestamp: 1733821200000,
  461. hour: 17,
  462. timeFormat: '12/10/2024, 5:00:00 PM'
  463. },
  464. {
  465. name: '晚上区间',
  466. start: '18:00',
  467. timestamp: 1733824800000,
  468. hour: 18,
  469. timeFormat: '12/10/2024, 6:00:00 PM'
  470. },
  471. {
  472. name: '晚上区间',
  473. start: '19:00',
  474. timestamp: 1733828400000,
  475. hour: 19,
  476. timeFormat: '12/10/2024, 7:00:00 PM'
  477. },
  478. {
  479. name: '晚上区间',
  480. start: '20:45',
  481. timestamp: 1733832000000,
  482. hour: 20,
  483. timeFormat: '12/10/2024, 8:00:00 PM'
  484. },
  485. {
  486. name: '晚上区间',
  487. start: '21:00',
  488. timestamp: 1733835600000,
  489. hour: 21,
  490. timeFormat: '12/10/2024, 9:00:00 PM'
  491. },
  492. {
  493. name: '深夜区间',
  494. start: '22:00',
  495. timestamp: 1733839200000,
  496. hour: 22,
  497. timeFormat: '12/10/2024, 10:00:00 PM'
  498. },
  499. {
  500. name: '深夜区间',
  501. start: '23:00',
  502. timestamp: 1733842800000,
  503. hour: 23,
  504. timeFormat: '12/10/2024, 11:00:00 PM'
  505. }
  506. ]
  507. return ranges;
  508. }
  509. CMD.stopScheduler = function() {
  510. jobs.forEach(({ job }) => job.cancel());
  511. jobs = [];
  512. console.log('调度器已停止,所有任务已清除');
  513. }
  514. CMD.startScheduler = function(taskCallback) {
  515. CMD.stopScheduler()
  516. // 为每个时间点创建定时任务
  517. _24HourRanges.forEach(timeRange => {
  518. // 解析小时
  519. const hour = parseInt(timeRange.start.split(':')[0]);
  520. const minute = parseInt(timeRange.start.split(':')[1]);
  521. // 创建定时任务 - 在每天的指定小时整点执行
  522. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  523. try {
  524. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  525. await pullDataService.getToken()
  526. // 执行回调函数
  527. await taskCallback({
  528. timeRange,
  529. executionTime: new Date()
  530. });
  531. } catch (error) {
  532. // 记录失败
  533. console.error('任务执行错误:', error);
  534. }
  535. });
  536. jobs.push({
  537. job,
  538. timeRange
  539. });
  540. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  541. });
  542. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  543. }
  544. async function sendPullDataMQMessage(message,routingKey = "on_recv_pull_data") {
  545. try {
  546. if(routingKey!=null){
  547. await rabbitMq.producerDirectMsg( message,"exchange_pull_data_system",routingKey);
  548. }else{
  549. await rabbitMq.producerDirectMsg( message,"exchange_pull_data_system");
  550. }
  551. console.log('消息发送成功');
  552. } catch (error) {
  553. console.error('发送消息失败:', error);
  554. }
  555. }
  556. async function test() {
  557. this.session = axios.create({
  558. baseURL: config.pull_data_config.debug_host,
  559. headers: {
  560. 'Accept': 'application/json, text/plain, */*',
  561. }
  562. });
  563. this.token = ""
  564. const response = await this.session.post(config.pull_data_config.get_token,{
  565. userName:config.pull_data_config.userName,
  566. verCode:config.pull_data_config.verCode,
  567. password:config.pull_data_config.release_password,
  568. loginType:config.pull_data_config.loginType
  569. });
  570. console.log("response:",response)
  571. }
  572. if(!config.isDebug){
  573. CMD.init()
  574. }