PULL_DATA.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. const CMD = {}
  2. const time_count = 5000;
  3. const redis_help = require('../src/use_redis');
  4. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  5. const axios = require('axios')
  6. const config = require('../etc/config.json');
  7. const DailyRecordManager = require('./daily_records');
  8. const schedule = require('node-schedule');
  9. const manager = new DailyRecordManager();
  10. const rabbitMq = require('../src/mq/rabbit-mq');
  11. var session = null
  12. var cur_page = 0
  13. var _24HourRanges = []
  14. var jobs = []
  15. var cur_day_data = new Map()
  16. var cur_timeRange = null
  17. const messageHandler = async (msg) => {
  18. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  19. PullDataConfig = JSON.parse(PullDataConfig)
  20. _24HourRanges = PullDataConfig
  21. CMD.startScheduler(taskCallback)
  22. };
  23. const taskCallback = async (context) => {
  24. var { timeRange, executionTime } = context;
  25. console.log('执行任务:', {
  26. 时间区间: timeRange.name,
  27. 开始时间: timeRange.start,
  28. 执行时间: executionTime
  29. });
  30. const today = new Date().toISOString().slice(0, 10);
  31. cur_day_data.forEach((v,k)=>{
  32. if(k==today){
  33. }else{
  34. cur_day_data.set(k, [])
  35. }
  36. })
  37. if(!cur_day_data.has(today)){
  38. cur_day_data.set(today,[])
  39. }
  40. if(!cur_day_data.get(today)[timeRange.start]){
  41. cur_day_data.get(today)[timeRange.start] = {name:timeRange.name,interval_minute:timeRange.interval_minute}
  42. }
  43. console.log("cur_day_data[today]:",cur_day_data.get(today),today)
  44. const existingRecord = await manager.getRecord(today);
  45. if (existingRecord) {
  46. // 如果存在今天的记录,则更新
  47. const result = await manager.updateRecord(today, cur_day_data.get(today));
  48. console.log("更新记录结果:", result);
  49. } else {
  50. // 如果不存在今天的记录,则创建新记录
  51. const result = await manager.createRecord(today, cur_day_data.get(today));
  52. console.log("创建新记录结果:", result);
  53. }
  54. // const result = await manager.createRecord(today,cur_day_data[today]);
  55. // if(result.success){
  56. // }
  57. cur_page = 0
  58. cur_timeRange = timeRange
  59. // 示例:实际任务逻辑
  60. await processTask(timeRange);
  61. };
  62. // 启动消费者
  63. async function startConsumer() {
  64. try {
  65. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig");
  66. } catch (error) {
  67. console.error('启动消费者失败:', error);
  68. }
  69. }
  70. async function processTask(timeRange){
  71. try{
  72. if(cur_page==-1){
  73. throw "没有数据了"
  74. }
  75. let rs = await CMD.getToken()
  76. await redis_help.setKeyValue("pull_data_token",rs.result.token)
  77. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  78. FilterConfig = JSON.parse(FilterConfig)
  79. let response = await CMD.get_novel_material_list(cur_page)
  80. // console.log("get_novel_material_list:",response)
  81. let materialId_list = []
  82. let materialId_data_list = []
  83. if(response.data.list == undefined){
  84. cur_page = -1
  85. throw "没有数据了"
  86. }
  87. if(response.data.list.length<=0){
  88. cur_page = -1
  89. throw "没有数据了"
  90. }
  91. if(timeRange!=null){
  92. const today = new Date().toISOString().slice(0, 10);
  93. console.log("response.data:",response.data,today)
  94. if(cur_day_data.get(today)[timeRange.start]){
  95. console.log("response.data.count:",response.data.count)
  96. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data.get(today)[timeRange.start])
  97. cur_day_data.get(today)[timeRange.start].count = response.data.count
  98. }else{
  99. console.log("cur_day_data空")
  100. }
  101. const updateResult = await manager.updateRecord(today,cur_day_data.get(today));
  102. }
  103. for (let index = 0; index < response.data.list.length; index++) {
  104. const origin_element = response.data.list[index];
  105. const result = await origin_data_controllers.getOriginData({
  106. materialId:origin_element.materialId})
  107. if(result.success){
  108. await origin_data_controllers.updateOriginData({id:result.data.id},
  109. {
  110. kepp_num:origin_element.favoriteCount.count,
  111. comment_num:origin_element.commentCount.count,
  112. like_num:origin_element.likeCount.count,
  113. shared_num:origin_element.shareCount.count
  114. })
  115. }else{
  116. if(CMD.isRight(FilterConfig,origin_element)){
  117. materialId_list.push(origin_element.materialId)
  118. materialId_data_list[origin_element.materialId] = {
  119. kepp_num:origin_element.favoriteCount.count,
  120. comment_num:origin_element.commentCount.count,
  121. like_num:origin_element.likeCount.count,
  122. shared_num:origin_element.shareCount.count
  123. }
  124. }
  125. }
  126. }
  127. response = await CMD.get_detail(materialId_list)
  128. for (let index = 0; index < response.data.list.length; index++) {
  129. const element = response.data.list[index];
  130. const info = materialId_data_list[element.materialId]
  131. if(element.hasAnchorInfo){
  132. await origin_data_controllers.createOriginData(
  133. {
  134. video_id:element.awemeId,
  135. materialId:element.materialId,
  136. video_link:element.awemeUrl,
  137. title:CMD.subTitle(element.title),
  138. publish_time:new Date(element.publishTime),
  139. kepp_num:info.kepp_num,
  140. comment_num:info.comment_num,
  141. like_num:info.like_num,
  142. shared_num:info.shared_num,
  143. is_guajian:1,
  144. guajian_link:element.landingUrl,
  145. status:0
  146. }
  147. )
  148. }
  149. }
  150. // console.log("get_detail:",response)
  151. // await pull_log_controllers.createData()
  152. }catch(e){
  153. console.error("PULL_DATA: error:",e)
  154. }finally{
  155. if(cur_page!=-1){
  156. cur_page++;
  157. global.setTimeout(processTask,time_count)
  158. }
  159. }
  160. }
  161. CMD.subTitle = function(title){
  162. if(title.length>299){
  163. return title.substring(0, 299);
  164. }
  165. return title
  166. }
  167. CMD.isRight = function(FilterConfig,origin_element){
  168. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  169. return false
  170. }
  171. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  172. return false
  173. }
  174. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  175. return false
  176. }
  177. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  178. return false
  179. }
  180. return true
  181. }
  182. CMD.getToken = async function(){
  183. const response = await session.post(config.pull_data_config.get_token,{
  184. userName:config.pull_data_config.userName,
  185. verCode:"1234",
  186. password:config.pull_data_config.password,
  187. loginType:"OPENAPI"
  188. });
  189. return response.data
  190. }
  191. CMD.get_novel_material_list = async function(page,size=500) {
  192. const now = new Date(); // 当前时间
  193. let interval_minute = 60;
  194. if(cur_timeRange!=null){
  195. interval_minute = cur_timeRange.interval_minute
  196. }
  197. // const fiveMinutesAgo = new Date(now - 5 * 60 * 1000); // 5分钟前的时间
  198. const fiveMinutesAgo = new Date(now - interval_minute * interval_minute * 1000); // 60分钟前的时间
  199. let data = {
  200. startTime:fiveMinutesAgo.getTime(),
  201. endTime:now.getTime()
  202. }
  203. let params = `page=${page}&size=${size}&sort=update_time,desc`
  204. let url = config.pull_data_config.get_novel_material_list+params
  205. console.log(url,data)
  206. var response = await session.post(url,data, {
  207. headers: {
  208. 'token':await redis_help.getKeyValue("pull_data_token")
  209. }
  210. });
  211. return response.data
  212. }
  213. CMD.get_detail = async function(materialId_list) {
  214. let data = {
  215. list:materialId_list
  216. }
  217. const response = await session.post(config.pull_data_config.get_detail,data, {
  218. headers: {
  219. 'token':await redis_help.getKeyValue("pull_data_token")
  220. }
  221. });
  222. return response.data
  223. }
  224. CMD.init = async function(){
  225. session = axios.create({
  226. baseURL: config.isDebug?config.pull_data_config.debug_host:config.pull_data_config.release_host,
  227. headers: {
  228. 'Accept': 'application/json, text/plain, */*',
  229. }
  230. });
  231. // await startConsumer();
  232. redis_help.connect(async ()=>{
  233. startConsumer()
  234. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig")
  235. PullDataConfig = JSON.parse(PullDataConfig)
  236. let response = await CMD.getToken()
  237. await redis_help.setKeyValue("pull_data_token",response.data.token)
  238. // processTask()
  239. _24HourRanges = PullDataConfig
  240. // 定义任务回调函数
  241. let key = new Date().toISOString().slice(0, 10);
  242. let record_res = await manager.getRecord(key);
  243. if(record_res!=null){
  244. if(!cur_day_data.has(key)){
  245. cur_day_data.set(key,record_res.content)
  246. }
  247. }
  248. // 启动调度器
  249. CMD.startScheduler(taskCallback);
  250. })
  251. }
  252. // 生成24小时的时间区间
  253. CMD.generate24HourRanges = function() {
  254. // const ranges = [];
  255. // const today = new Date();
  256. // today.setMinutes(0);
  257. // today.setSeconds(0);
  258. // today.setMilliseconds(0);
  259. // for (let hour = 0; hour < 24; hour++) {
  260. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  261. // let periodName = '';
  262. // // 设置当天的小时
  263. // today.setHours(hour);
  264. // const timestamp = today.getTime();
  265. // // 根据时间划分时段
  266. // if (hour >= 0 && hour < 6) {
  267. // periodName = '凌晨区间';
  268. // } else if (hour >= 6 && hour < 9) {
  269. // periodName = '早晨区间';
  270. // } else if (hour >= 9 && hour < 12) {
  271. // periodName = '上午区间';
  272. // } else if (hour >= 12 && hour < 14) {
  273. // periodName = '中午区间';
  274. // } else if (hour >= 14 && hour < 18) {
  275. // periodName = '下午区间';
  276. // } else if (hour >= 18 && hour < 22) {
  277. // periodName = '晚上区间';
  278. // } else {
  279. // periodName = '深夜区间';
  280. // }
  281. // ranges.push({
  282. // name: periodName,
  283. // start: timeString,
  284. // timestamp: timestamp,
  285. // hour:hour,
  286. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  287. // });
  288. // }
  289. let ranges = [
  290. {
  291. name: '凌晨区间',
  292. start: '00:00',
  293. timestamp: 1733760000000,
  294. hour: 0,
  295. timeFormat: '12/10/2024, 12:00:00 AM'
  296. },
  297. {
  298. name: '凌晨区间',
  299. start: '01:00',
  300. timestamp: 1733763600000,
  301. hour: 1,
  302. timeFormat: '12/10/2024, 1:00:00 AM'
  303. },
  304. {
  305. name: '凌晨区间',
  306. start: '02:00',
  307. timestamp: 1733767200000,
  308. hour: 2,
  309. timeFormat: '12/10/2024, 2:00:00 AM'
  310. },
  311. {
  312. name: '凌晨区间',
  313. start: '03:00',
  314. timestamp: 1733770800000,
  315. hour: 3,
  316. timeFormat: '12/10/2024, 3:00:00 AM'
  317. },
  318. {
  319. name: '凌晨区间',
  320. start: '04:00',
  321. timestamp: 1733774400000,
  322. hour: 4,
  323. timeFormat: '12/10/2024, 4:00:00 AM'
  324. },
  325. {
  326. name: '凌晨区间',
  327. start: '05:00',
  328. timestamp: 1733778000000,
  329. hour: 5,
  330. timeFormat: '12/10/2024, 5:00:00 AM'
  331. },
  332. {
  333. name: '早晨区间',
  334. start: '06:00',
  335. timestamp: 1733781600000,
  336. hour: 6,
  337. timeFormat: '12/10/2024, 6:00:00 AM'
  338. },
  339. {
  340. name: '早晨区间',
  341. start: '07:00',
  342. timestamp: 1733785200000,
  343. hour: 7,
  344. timeFormat: '12/10/2024, 7:00:00 AM'
  345. },
  346. {
  347. name: '早晨区间',
  348. start: '08:00',
  349. timestamp: 1733788800000,
  350. hour: 8,
  351. timeFormat: '12/10/2024, 8:00:00 AM'
  352. },
  353. {
  354. name: '上午区间',
  355. start: '09:00',
  356. timestamp: 1733792400000,
  357. hour: 9,
  358. timeFormat: '12/10/2024, 9:00:00 AM'
  359. },
  360. {
  361. name: '上午区间',
  362. start: '10:00',
  363. timestamp: 1733796000000,
  364. hour: 10,
  365. timeFormat: '12/10/2024, 10:00:00 AM'
  366. },
  367. {
  368. name: '上午区间',
  369. start: '11:00',
  370. timestamp: 1733799600000,
  371. hour: 11,
  372. timeFormat: '12/10/2024, 11:00:00 AM'
  373. },
  374. {
  375. name: '中午区间',
  376. start: '12:00',
  377. timestamp: 1733803200000,
  378. hour: 12,
  379. timeFormat: '12/10/2024, 12:00:00 PM'
  380. },
  381. {
  382. name: '中午区间',
  383. start: '13:00',
  384. timestamp: 1733806800000,
  385. hour: 13,
  386. timeFormat: '12/10/2024, 1:00:00 PM'
  387. },
  388. {
  389. name: '下午区间',
  390. start: '14:00',
  391. timestamp: 1733810400000,
  392. hour: 14,
  393. timeFormat: '12/10/2024, 2:00:00 PM'
  394. },
  395. {
  396. name: '下午区间',
  397. start: '15:00',
  398. timestamp: 1733814000000,
  399. hour: 15,
  400. timeFormat: '12/10/2024, 3:00:00 PM'
  401. },
  402. {
  403. name: '下午区间',
  404. start: '16:00',
  405. timestamp: 1733817600000,
  406. hour: 16,
  407. timeFormat: '12/10/2024, 4:00:00 PM'
  408. },
  409. {
  410. name: '下午区间',
  411. start: '17:00',
  412. timestamp: 1733821200000,
  413. hour: 17,
  414. timeFormat: '12/10/2024, 5:00:00 PM'
  415. },
  416. {
  417. name: '晚上区间',
  418. start: '18:00',
  419. timestamp: 1733824800000,
  420. hour: 18,
  421. timeFormat: '12/10/2024, 6:00:00 PM'
  422. },
  423. {
  424. name: '晚上区间',
  425. start: '19:00',
  426. timestamp: 1733828400000,
  427. hour: 19,
  428. timeFormat: '12/10/2024, 7:00:00 PM'
  429. },
  430. {
  431. name: '晚上区间',
  432. start: '20:45',
  433. timestamp: 1733832000000,
  434. hour: 20,
  435. timeFormat: '12/10/2024, 8:00:00 PM'
  436. },
  437. {
  438. name: '晚上区间',
  439. start: '21:00',
  440. timestamp: 1733835600000,
  441. hour: 21,
  442. timeFormat: '12/10/2024, 9:00:00 PM'
  443. },
  444. {
  445. name: '深夜区间',
  446. start: '22:00',
  447. timestamp: 1733839200000,
  448. hour: 22,
  449. timeFormat: '12/10/2024, 10:00:00 PM'
  450. },
  451. {
  452. name: '深夜区间',
  453. start: '23:00',
  454. timestamp: 1733842800000,
  455. hour: 23,
  456. timeFormat: '12/10/2024, 11:00:00 PM'
  457. }
  458. ]
  459. return ranges;
  460. }
  461. CMD.stopScheduler = function() {
  462. jobs.forEach(({ job }) => job.cancel());
  463. jobs = [];
  464. console.log('调度器已停止,所有任务已清除');
  465. }
  466. CMD.startScheduler = function(taskCallback) {
  467. CMD.stopScheduler()
  468. // 为每个时间点创建定时任务
  469. _24HourRanges.forEach(timeRange => {
  470. // 解析小时
  471. const hour = parseInt(timeRange.start.split(':')[0]);
  472. const minute = parseInt(timeRange.start.split(':')[1]);
  473. // 创建定时任务 - 在每天的指定小时整点执行
  474. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  475. try {
  476. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  477. // 执行回调函数
  478. await taskCallback({
  479. timeRange,
  480. executionTime: new Date()
  481. });
  482. } catch (error) {
  483. // 记录失败
  484. console.error('任务执行错误:', error);
  485. }
  486. });
  487. jobs.push({
  488. job,
  489. timeRange
  490. });
  491. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  492. });
  493. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  494. }
  495. CMD.init()