PULL_DATA.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. const CMD = {}
  2. const time_count = 1000;
  3. const redis_help = require('../src/use_redis');
  4. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  5. const axios = require('axios')
  6. const config = require('../etc/config.json');
  7. const DailyRecordManager = require('./daily_records');
  8. const schedule = require('node-schedule');
  9. const manager = new DailyRecordManager();
  10. const rabbitMq = require('../src/mq/rabbit-mq');
  11. const helper = require('../src/helper');
  12. const PullDataService = require('../src/PullDataService');
  13. var _24HourRanges = []
  14. var jobs = []
  15. var cur_day_data = new Map()
  16. var cur_timeRange = null
  17. var pullDataService = new PullDataService(redis_help)
  18. var cur_page = 0
  19. const messageHandler = async (msg) => {
  20. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  21. PullDataConfig = JSON.parse(PullDataConfig)
  22. _24HourRanges = PullDataConfig
  23. CMD.startScheduler(taskCallback)
  24. };
  25. const taskCallback = async (context) => {
  26. var { timeRange, executionTime } = context;
  27. console.log('执行任务:', {
  28. 时间区间: timeRange.name,
  29. 开始时间: timeRange.start,
  30. 执行时间: executionTime
  31. });
  32. const today = helper.getLocalDate();
  33. cur_day_data.forEach((v,k)=>{
  34. if(k==today){
  35. }else{
  36. cur_day_data.set(k, {})
  37. }
  38. })
  39. if(!cur_day_data.has(today)){
  40. cur_day_data.set(today,{})
  41. }
  42. if(!cur_day_data.get(today)[timeRange.start]){
  43. cur_day_data.get(today)[timeRange.start] = {name:timeRange.name,interval_minute:timeRange.interval_minute,finish_count:0}
  44. }
  45. console.log("cur_day_data[today]:",cur_day_data.get(today),today)
  46. const existingRecord = await manager.getRecord(today);
  47. if (existingRecord) {
  48. // 如果存在今天的记录,则更新
  49. const result = await manager.updateRecord(today, cur_day_data.get(today));
  50. console.log("更新记录结果:", result);
  51. } else {
  52. // 如果不存在今天的记录,则创建新记录
  53. const result = await manager.createRecord(today, cur_day_data.get(today));
  54. console.log("创建新记录结果:", result);
  55. }
  56. cur_timeRange = timeRange
  57. cur_timeRange.finish_count = 0
  58. cur_page = 0
  59. await processTask();
  60. };
  61. // 启动消费者
  62. async function startConsumer() {
  63. try {
  64. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig");
  65. } catch (error) {
  66. console.error('启动消费者失败:', error);
  67. }
  68. }
  69. async function processDetailTask(task_item) {
  70. try{
  71. console.log(" start processDetailTask")
  72. let materialId_list = task_item.materialId_list
  73. let materialId_data_list = task_item.materialId_data_list
  74. let response = await pullDataService.get_detail(materialId_list)
  75. if(!response.success){
  76. console.log("get_detail:",response)
  77. throw {msg:response,timeRange:cur_timeRange,fun:"get_detail",materialId_list:materialId_list}
  78. }
  79. if(cur_timeRange!=null){
  80. cur_timeRange.finish_count+=response.data.list.length
  81. const today = helper.getLocalDate();
  82. cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  83. await manager.updateRecord(today,cur_day_data.get(today));
  84. }
  85. for (let index = 0; index < response.data.list.length; index++) {
  86. const element = response.data.list[index];
  87. const info = materialId_data_list[element.materialId]
  88. if(element.hasAnchorInfo){
  89. await origin_data_controllers.createOriginData(
  90. {
  91. video_id:element.awemeId,
  92. materialId:element.materialId,
  93. video_link:element.awemeUrl,
  94. title:CMD.subTitle(element.title),
  95. publish_time:new Date(element.publishTime),
  96. kepp_num:info.kepp_num,
  97. comment_num:info.comment_num,
  98. like_num:info.like_num,
  99. shared_num:info.shared_num,
  100. is_guajian:1,
  101. guajian_link:element.landingUrl,
  102. status:0,
  103. createTime_new:new Date(element.createTime),
  104. updateTime_new:new Date(element.updateTime)
  105. }
  106. )
  107. }
  108. }
  109. console.log(" finish processDetailTask")
  110. }catch (error) {
  111. console.error("processDetailTask:",error)
  112. }finally{
  113. // global.setTimeout(processDetailTask,500)
  114. }
  115. }
  116. async function process_material_list(response,page) {
  117. try{
  118. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  119. FilterConfig = JSON.parse(FilterConfig)
  120. let materialId_list = []
  121. let materialId_data_list = []
  122. if(!response.success){
  123. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  124. }
  125. if(response.data == undefined||response.data == null){
  126. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  127. }
  128. if(response.data.list == undefined||response.data.list == null){
  129. throw "没有数据了"
  130. }
  131. if(response.data.list.length<=0){
  132. throw "没有数据了"
  133. }
  134. if(cur_timeRange!=null){ //统计
  135. const today = helper.getLocalDate();
  136. // console.log("response.data:",response.data,today)
  137. if(cur_day_data.get(today)[cur_timeRange.start]){
  138. console.log("response.data.count:",response.data.count)
  139. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data.get(today)[cur_timeRange.start])
  140. cur_timeRange.count = cur_timeRange.count + response.data.list.length
  141. cur_timeRange.cur_page = page
  142. cur_day_data.get(today)[cur_timeRange.start].start_time = cur_timeRange.start_time
  143. cur_day_data.get(today)[cur_timeRange.start].pull_day = cur_timeRange.pull_day
  144. cur_day_data.get(today)[cur_timeRange.start].pull_time = cur_timeRange.pull_time ||{}
  145. cur_day_data.get(today)[cur_timeRange.start].count = response.data.count
  146. cur_day_data.get(today)[cur_timeRange.start].cur_page = page
  147. }else{
  148. console.log("cur_day_data空")
  149. }
  150. manager.updateRecord(today,cur_day_data.get(today));
  151. }
  152. sendPullDataMQMessage(JSON.stringify({cmd:"on_recv_pull_data",data:response.data.list}))
  153. for (let index = 0; index < response.data.list.length; index++) {
  154. const origin_element = response.data.list[index];
  155. materialId_list.push(origin_element.materialId)
  156. materialId_data_list[origin_element.materialId] = {
  157. kepp_num:0,
  158. comment_num:0,
  159. like_num:0,
  160. shared_num:0
  161. }
  162. }
  163. let result = await origin_data_controllers.findNonExistentMaterialIds(materialId_list)
  164. return {materialId_list:result.data.nonExistentIds,materialId_data_list:materialId_data_list,count:response.data.list.length}
  165. }catch(e){
  166. console.log("materialId_list:",e)
  167. return null
  168. }
  169. }
  170. async function processTask(){
  171. try{
  172. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  173. FilterConfig = JSON.parse(FilterConfig)
  174. let response = await pullDataService.get_novel_material_list(cur_timeRange,cur_page,500)
  175. if(response.success){
  176. if(response.data.list.length<=0){
  177. cur_page = -1;
  178. throw response
  179. }
  180. }else{
  181. cur_page = -1;
  182. throw response
  183. }
  184. let detail_item = await process_material_list(response,cur_page,cur_timeRange)
  185. if(detail_item.materialId_list.length<=0){
  186. }else{
  187. await processDetailTask(detail_item)
  188. }
  189. console.log("processTask over!")
  190. }catch(e){
  191. console.error("PULL_DATA: error:",e)
  192. }finally{
  193. if(cur_page!=-1){
  194. cur_page++;
  195. global.setTimeout(processTask,1000)
  196. }
  197. }
  198. }
  199. CMD.subTitle = function(title){
  200. if(title.length>299){
  201. return title.substring(0, 299);
  202. }
  203. return title
  204. }
  205. CMD.isRight = function(FilterConfig,origin_element){
  206. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  207. return false
  208. }
  209. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  210. return false
  211. }
  212. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  213. return false
  214. }
  215. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  216. return false
  217. }
  218. return true
  219. }
  220. CMD.init = async function(){
  221. // await startConsumer();
  222. redis_help.connect(async ()=>{
  223. startConsumer()
  224. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  225. PullDataConfig = JSON.parse(PullDataConfig)
  226. // processTask()
  227. _24HourRanges = PullDataConfig
  228. // 定义任务回调函数
  229. let key = helper.getLocalDate();
  230. let record_res = await manager.getRecord(key);
  231. if(record_res!=null){
  232. if(!cur_day_data.has(key)){
  233. cur_day_data.set(key,record_res.content)
  234. }
  235. }
  236. await pullDataService.getToken()
  237. // 启动调度器
  238. CMD.startScheduler(taskCallback);
  239. })
  240. }
  241. // 生成24小时的时间区间
  242. CMD.generate24HourRanges = function() {
  243. // const ranges = [];
  244. // const today = new Date();
  245. // today.setMinutes(0);
  246. // today.setSeconds(0);
  247. // today.setMilliseconds(0);
  248. // for (let hour = 0; hour < 24; hour++) {
  249. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  250. // let periodName = '';
  251. // // 设置当天的小时
  252. // today.setHours(hour);
  253. // const timestamp = today.getTime();
  254. // // 根据时间划分时段
  255. // if (hour >= 0 && hour < 6) {
  256. // periodName = '凌晨区间';
  257. // } else if (hour >= 6 && hour < 9) {
  258. // periodName = '早晨区间';
  259. // } else if (hour >= 9 && hour < 12) {
  260. // periodName = '上午区间';
  261. // } else if (hour >= 12 && hour < 14) {
  262. // periodName = '中午区间';
  263. // } else if (hour >= 14 && hour < 18) {
  264. // periodName = '下午区间';
  265. // } else if (hour >= 18 && hour < 22) {
  266. // periodName = '晚上区间';
  267. // } else {
  268. // periodName = '深夜区间';
  269. // }
  270. // ranges.push({
  271. // name: periodName,
  272. // start: timeString,
  273. // timestamp: timestamp,
  274. // hour:hour,
  275. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  276. // });
  277. // }
  278. let ranges = [
  279. {
  280. name: '凌晨区间',
  281. start: '00:00',
  282. timestamp: 1733760000000,
  283. hour: 0,
  284. timeFormat: '12/10/2024, 12:00:00 AM'
  285. },
  286. {
  287. name: '凌晨区间',
  288. start: '01:00',
  289. timestamp: 1733763600000,
  290. hour: 1,
  291. timeFormat: '12/10/2024, 1:00:00 AM'
  292. },
  293. {
  294. name: '凌晨区间',
  295. start: '02:00',
  296. timestamp: 1733767200000,
  297. hour: 2,
  298. timeFormat: '12/10/2024, 2:00:00 AM'
  299. },
  300. {
  301. name: '凌晨区间',
  302. start: '03:00',
  303. timestamp: 1733770800000,
  304. hour: 3,
  305. timeFormat: '12/10/2024, 3:00:00 AM'
  306. },
  307. {
  308. name: '凌晨区间',
  309. start: '04:00',
  310. timestamp: 1733774400000,
  311. hour: 4,
  312. timeFormat: '12/10/2024, 4:00:00 AM'
  313. },
  314. {
  315. name: '凌晨区间',
  316. start: '05:00',
  317. timestamp: 1733778000000,
  318. hour: 5,
  319. timeFormat: '12/10/2024, 5:00:00 AM'
  320. },
  321. {
  322. name: '早晨区间',
  323. start: '06:00',
  324. timestamp: 1733781600000,
  325. hour: 6,
  326. timeFormat: '12/10/2024, 6:00:00 AM'
  327. },
  328. {
  329. name: '早晨区间',
  330. start: '07:00',
  331. timestamp: 1733785200000,
  332. hour: 7,
  333. timeFormat: '12/10/2024, 7:00:00 AM'
  334. },
  335. {
  336. name: '早晨区间',
  337. start: '08:00',
  338. timestamp: 1733788800000,
  339. hour: 8,
  340. timeFormat: '12/10/2024, 8:00:00 AM'
  341. },
  342. {
  343. name: '上午区间',
  344. start: '09:00',
  345. timestamp: 1733792400000,
  346. hour: 9,
  347. timeFormat: '12/10/2024, 9:00:00 AM'
  348. },
  349. {
  350. name: '上午区间',
  351. start: '10:00',
  352. timestamp: 1733796000000,
  353. hour: 10,
  354. timeFormat: '12/10/2024, 10:00:00 AM'
  355. },
  356. {
  357. name: '上午区间',
  358. start: '11:00',
  359. timestamp: 1733799600000,
  360. hour: 11,
  361. timeFormat: '12/10/2024, 11:00:00 AM'
  362. },
  363. {
  364. name: '中午区间',
  365. start: '12:00',
  366. timestamp: 1733803200000,
  367. hour: 12,
  368. timeFormat: '12/10/2024, 12:00:00 PM'
  369. },
  370. {
  371. name: '中午区间',
  372. start: '13:00',
  373. timestamp: 1733806800000,
  374. hour: 13,
  375. timeFormat: '12/10/2024, 1:00:00 PM'
  376. },
  377. {
  378. name: '下午区间',
  379. start: '14:00',
  380. timestamp: 1733810400000,
  381. hour: 14,
  382. timeFormat: '12/10/2024, 2:00:00 PM'
  383. },
  384. {
  385. name: '下午区间',
  386. start: '15:00',
  387. timestamp: 1733814000000,
  388. hour: 15,
  389. timeFormat: '12/10/2024, 3:00:00 PM'
  390. },
  391. {
  392. name: '下午区间',
  393. start: '16:00',
  394. timestamp: 1733817600000,
  395. hour: 16,
  396. timeFormat: '12/10/2024, 4:00:00 PM'
  397. },
  398. {
  399. name: '下午区间',
  400. start: '17:00',
  401. timestamp: 1733821200000,
  402. hour: 17,
  403. timeFormat: '12/10/2024, 5:00:00 PM'
  404. },
  405. {
  406. name: '晚上区间',
  407. start: '18:00',
  408. timestamp: 1733824800000,
  409. hour: 18,
  410. timeFormat: '12/10/2024, 6:00:00 PM'
  411. },
  412. {
  413. name: '晚上区间',
  414. start: '19:00',
  415. timestamp: 1733828400000,
  416. hour: 19,
  417. timeFormat: '12/10/2024, 7:00:00 PM'
  418. },
  419. {
  420. name: '晚上区间',
  421. start: '20:45',
  422. timestamp: 1733832000000,
  423. hour: 20,
  424. timeFormat: '12/10/2024, 8:00:00 PM'
  425. },
  426. {
  427. name: '晚上区间',
  428. start: '21:00',
  429. timestamp: 1733835600000,
  430. hour: 21,
  431. timeFormat: '12/10/2024, 9:00:00 PM'
  432. },
  433. {
  434. name: '深夜区间',
  435. start: '22:00',
  436. timestamp: 1733839200000,
  437. hour: 22,
  438. timeFormat: '12/10/2024, 10:00:00 PM'
  439. },
  440. {
  441. name: '深夜区间',
  442. start: '23:00',
  443. timestamp: 1733842800000,
  444. hour: 23,
  445. timeFormat: '12/10/2024, 11:00:00 PM'
  446. }
  447. ]
  448. return ranges;
  449. }
  450. CMD.stopScheduler = function() {
  451. jobs.forEach(({ job }) => job.cancel());
  452. jobs = [];
  453. console.log('调度器已停止,所有任务已清除');
  454. }
  455. CMD.startScheduler = function(taskCallback) {
  456. CMD.stopScheduler()
  457. // 为每个时间点创建定时任务
  458. _24HourRanges.forEach(timeRange => {
  459. // 解析小时
  460. const hour = parseInt(timeRange.start.split(':')[0]);
  461. const minute = parseInt(timeRange.start.split(':')[1]);
  462. // 创建定时任务 - 在每天的指定小时整点执行
  463. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  464. try {
  465. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  466. await pullDataService.getToken()
  467. // 执行回调函数
  468. await taskCallback({
  469. timeRange,
  470. executionTime: new Date()
  471. });
  472. } catch (error) {
  473. // 记录失败
  474. console.error('任务执行错误:', error);
  475. }
  476. });
  477. jobs.push({
  478. job,
  479. timeRange
  480. });
  481. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  482. });
  483. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  484. }
  485. async function sendPullDataMQMessage(message,routingKey = "on_recv_pull_data") {
  486. try {
  487. if(routingKey!=null){
  488. await rabbitMq.producerDirectMsg( message,"exchange_pull_data_system",routingKey);
  489. }else{
  490. await rabbitMq.producerDirectMsg( message,"exchange_pull_data_system");
  491. }
  492. console.log('消息发送成功');
  493. } catch (error) {
  494. console.error('发送消息失败:', error);
  495. }
  496. }
  497. if(!config.isDebug){
  498. CMD.init()
  499. }