PULL_DATA.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. const CMD = {}
  2. const time_count = 5000;
  3. const redis_help = require('../src/use_redis');
  4. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  5. const pull_log_controllers = require('../src/data_manager/Controllers/pull_log_controllers');
  6. const helper = require('../src/helper');
  7. const axios = require('axios')
  8. const HttpClient = require('../src/HttpClient')
  9. const config = require('../etc/config.json');
  10. const DailyRecordManager = require('./daily_records');
  11. const schedule = require('node-schedule');
  12. const manager = new DailyRecordManager();
  13. const rabbitMq = require('../src/mq/rabbit-mq');
  14. var session = null
  15. var cur_page = 0
  16. var task_list = []
  17. var _24HourRanges = []
  18. var jobs = []
  19. var cur_day_data = []
  20. const messageHandler = async (msg) => {
  21. // CMD.start_task(msg['data']['data'])
  22. console.log("msg:",msg)
  23. };
  24. // 启动消费者
  25. async function startConsumer() {
  26. try {
  27. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig");
  28. } catch (error) {
  29. console.error('启动消费者失败:', error);
  30. }
  31. }
  32. async function processTask(timeRange){
  33. try{
  34. if(cur_page==-1){
  35. throw "没有数据了"
  36. }
  37. await redis_help.setKeyValue("pull_data_token",await CMD.getToken().result.token)
  38. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  39. FilterConfig = JSON.parse(FilterConfig)
  40. let response = await CMD.get_novel_material_list(cur_page)
  41. // console.log("get_novel_material_list:",response)
  42. let materialId_list = []
  43. let materialId_data_list = []
  44. if(response.errorCode!=0){
  45. console.log("errorCode:",response.errorCode)
  46. cur_page = -1
  47. throw "没有数据了"
  48. }
  49. if(response.result.list == undefined){
  50. cur_page = -1
  51. throw "没有数据了"
  52. }
  53. if(response.result.list.length<=0){
  54. cur_page = -1
  55. throw "没有数据了"
  56. }
  57. if(timeRange!=null){
  58. const today = new Date().toISOString().split('T')[0];
  59. console.log("response.result:",response.result)
  60. if(cur_day_data[today][timeRange.start]){
  61. console.log("response.result.count:",response.result.count)
  62. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data[today][timeRange.start])
  63. cur_day_data[today][timeRange.start].count = response.result.count
  64. }else{
  65. console.log("cur_day_data空")
  66. }
  67. const updateResult = await manager.updateRecord(today,cur_day_data[today]);
  68. }
  69. for (let index = 0; index < response.result.list.length; index++) {
  70. const origin_element = response.result.list[index];
  71. const result = await origin_data_controllers.getOriginData({
  72. materialId:origin_element.materialId})
  73. if(result.success){
  74. await origin_data_controllers.updateOriginData({id:result.data.id},
  75. {
  76. kepp_num:origin_element.favoriteCount.count,
  77. comment_num:origin_element.commentCount.count,
  78. like_num:origin_element.likeCount.count,
  79. shared_num:origin_element.shareCount.count
  80. })
  81. }else{
  82. if(CMD.isRight(FilterConfig,origin_element)){
  83. materialId_list.push(origin_element.materialId)
  84. materialId_data_list[origin_element.materialId] = {
  85. kepp_num:origin_element.favoriteCount.count,
  86. comment_num:origin_element.commentCount.count,
  87. like_num:origin_element.likeCount.count,
  88. shared_num:origin_element.shareCount.count
  89. }
  90. }
  91. }
  92. }
  93. response = await CMD.get_detail(materialId_list)
  94. for (let index = 0; index < response.result.list.length; index++) {
  95. const element = response.result.list[index];
  96. const info = materialId_data_list[element.materialId]
  97. if(element.hasAnchorInfo){
  98. await origin_data_controllers.createOriginData(
  99. {
  100. video_id:element.awemeId,
  101. materialId:element.materialId,
  102. video_link:element.awemeUrl,
  103. title:CMD.subTitle(element.title),
  104. publish_time:new Date(element.publishTime),
  105. kepp_num:info.kepp_num,
  106. comment_num:info.comment_num,
  107. like_num:info.like_num,
  108. shared_num:info.shared_num,
  109. is_guajian:1,
  110. guajian_link:element.landingUrl,
  111. status:0
  112. }
  113. )
  114. }
  115. }
  116. // console.log("get_detail:",response)
  117. // await pull_log_controllers.createData()
  118. }catch(e){
  119. console.log("PULL_DATA: error:",e)
  120. }finally{
  121. if(cur_page!=-1){
  122. cur_page++;
  123. global.setTimeout(processTask,time_count)
  124. }
  125. }
  126. }
  127. CMD.subTitle = function(title){
  128. if(title.length>299){
  129. return title.substring(0, 299);
  130. }
  131. return title
  132. }
  133. CMD.isRight = function(FilterConfig,origin_element){
  134. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  135. return false
  136. }
  137. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  138. return false
  139. }
  140. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  141. return false
  142. }
  143. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  144. return false
  145. }
  146. return true
  147. }
  148. CMD.getToken = async function(){
  149. const response = await session.post(config.pull_data_config.get_token,{
  150. userName:config.pull_data_config.userName,
  151. verCode:"1234",
  152. password:config.pull_data_config.password,
  153. loginType:"OPENAPI"
  154. });
  155. console.log("getToken:",response.data.result.token)
  156. return response.data
  157. }
  158. CMD.get_novel_material_list = async function(page,size=500) {
  159. const now = new Date(); // 当前时间
  160. // const fiveMinutesAgo = new Date(now - 5 * 60 * 1000); // 5分钟前的时间
  161. const fiveMinutesAgo = new Date(now - 60 * 60 * 1000); // 60分钟前的时间
  162. let data = {
  163. startTime:fiveMinutesAgo.getTime(),
  164. endTime:now.getTime()
  165. }
  166. let params = `page=${page}&size=${size}&sort=update_time,desc`
  167. let url = config.pull_data_config.get_novel_material_list+params
  168. console.log(url,data)
  169. const response = await session.post(url,data, {
  170. headers: {
  171. 'token':await redis_help.getKeyValue("pull_data_token")
  172. }
  173. });
  174. return response.data
  175. }
  176. CMD.get_detail = async function(materialId_list) {
  177. let data = {
  178. list:materialId_list
  179. }
  180. const response = await session.post(config.pull_data_config.get_detail,data, {
  181. headers: {
  182. 'token':await redis_help.getKeyValue("pull_data_token")
  183. }
  184. });
  185. return response.data
  186. }
  187. CMD.init = async function(){
  188. session = axios.create({
  189. baseURL: config.isDebug?config.pull_data_config.debug_host:config.pull_data_config.release_host,
  190. headers: {
  191. 'Accept': 'application/json, text/plain, */*',
  192. }
  193. });
  194. // await startConsumer();
  195. redis_help.connect(async ()=>{
  196. let response = await CMD.getToken()
  197. await redis_help.setKeyValue("pull_data_token",response.result.token)
  198. // processTask()
  199. _24HourRanges = CMD.generate24HourRanges()
  200. // 定义任务回调函数
  201. const taskCallback = async (context) => {
  202. var { timeRange, executionTime } = context;
  203. console.log('执行任务:', {
  204. 时间区间: timeRange.name,
  205. 开始时间: timeRange.start,
  206. 执行时间: executionTime
  207. });
  208. const today = new Date().toISOString().split('T')[0];
  209. for (const key in cur_day_data) {
  210. if (Object.prototype.hasOwnProperty.call(cur_day_data, key)) {
  211. if(key==today){
  212. }else{
  213. cur_day_data[key] = []
  214. }
  215. }
  216. }
  217. if(!cur_day_data[today]){
  218. cur_day_data[today] = {}
  219. }
  220. if(!cur_day_data[today][timeRange.start]){
  221. cur_day_data[today][timeRange.start] = {name:timeRange.name}
  222. }
  223. console.log("cur_day_data[today]:",cur_day_data[today])
  224. const result = await manager.createRecord(today,cur_day_data[today]);
  225. if(result.success){
  226. }
  227. cur_page = 0
  228. // 示例:实际任务逻辑
  229. await processTask(timeRange);
  230. };
  231. let key = new Date().toISOString().split('T')[0]
  232. let record_res = await manager.getRecord(key);
  233. if(record_res!=null){
  234. cur_day_data[key] = record_res.content
  235. }
  236. // 启动调度器
  237. CMD.startScheduler(taskCallback);
  238. })
  239. }
  240. // 生成24小时的时间区间
  241. CMD.generate24HourRanges = function() {
  242. // const ranges = [];
  243. // const today = new Date();
  244. // today.setMinutes(0);
  245. // today.setSeconds(0);
  246. // today.setMilliseconds(0);
  247. // for (let hour = 0; hour < 24; hour++) {
  248. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  249. // let periodName = '';
  250. // // 设置当天的小时
  251. // today.setHours(hour);
  252. // const timestamp = today.getTime();
  253. // // 根据时间划分时段
  254. // if (hour >= 0 && hour < 6) {
  255. // periodName = '凌晨区间';
  256. // } else if (hour >= 6 && hour < 9) {
  257. // periodName = '早晨区间';
  258. // } else if (hour >= 9 && hour < 12) {
  259. // periodName = '上午区间';
  260. // } else if (hour >= 12 && hour < 14) {
  261. // periodName = '中午区间';
  262. // } else if (hour >= 14 && hour < 18) {
  263. // periodName = '下午区间';
  264. // } else if (hour >= 18 && hour < 22) {
  265. // periodName = '晚上区间';
  266. // } else {
  267. // periodName = '深夜区间';
  268. // }
  269. // ranges.push({
  270. // name: periodName,
  271. // start: timeString,
  272. // timestamp: timestamp,
  273. // hour:hour,
  274. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  275. // });
  276. // }
  277. let ranges = [
  278. {
  279. name: '凌晨区间',
  280. start: '00:00',
  281. timestamp: 1733760000000,
  282. hour: 0,
  283. timeFormat: '12/10/2024, 12:00:00 AM'
  284. },
  285. {
  286. name: '凌晨区间',
  287. start: '01:00',
  288. timestamp: 1733763600000,
  289. hour: 1,
  290. timeFormat: '12/10/2024, 1:00:00 AM'
  291. },
  292. {
  293. name: '凌晨区间',
  294. start: '02:00',
  295. timestamp: 1733767200000,
  296. hour: 2,
  297. timeFormat: '12/10/2024, 2:00:00 AM'
  298. },
  299. {
  300. name: '凌晨区间',
  301. start: '03:00',
  302. timestamp: 1733770800000,
  303. hour: 3,
  304. timeFormat: '12/10/2024, 3:00:00 AM'
  305. },
  306. {
  307. name: '凌晨区间',
  308. start: '04:00',
  309. timestamp: 1733774400000,
  310. hour: 4,
  311. timeFormat: '12/10/2024, 4:00:00 AM'
  312. },
  313. {
  314. name: '凌晨区间',
  315. start: '05:00',
  316. timestamp: 1733778000000,
  317. hour: 5,
  318. timeFormat: '12/10/2024, 5:00:00 AM'
  319. },
  320. {
  321. name: '早晨区间',
  322. start: '06:00',
  323. timestamp: 1733781600000,
  324. hour: 6,
  325. timeFormat: '12/10/2024, 6:00:00 AM'
  326. },
  327. {
  328. name: '早晨区间',
  329. start: '07:00',
  330. timestamp: 1733785200000,
  331. hour: 7,
  332. timeFormat: '12/10/2024, 7:00:00 AM'
  333. },
  334. {
  335. name: '早晨区间',
  336. start: '08:00',
  337. timestamp: 1733788800000,
  338. hour: 8,
  339. timeFormat: '12/10/2024, 8:00:00 AM'
  340. },
  341. {
  342. name: '上午区间',
  343. start: '09:00',
  344. timestamp: 1733792400000,
  345. hour: 9,
  346. timeFormat: '12/10/2024, 9:00:00 AM'
  347. },
  348. {
  349. name: '上午区间',
  350. start: '10:00',
  351. timestamp: 1733796000000,
  352. hour: 10,
  353. timeFormat: '12/10/2024, 10:00:00 AM'
  354. },
  355. {
  356. name: '上午区间',
  357. start: '11:00',
  358. timestamp: 1733799600000,
  359. hour: 11,
  360. timeFormat: '12/10/2024, 11:00:00 AM'
  361. },
  362. {
  363. name: '中午区间',
  364. start: '12:00',
  365. timestamp: 1733803200000,
  366. hour: 12,
  367. timeFormat: '12/10/2024, 12:00:00 PM'
  368. },
  369. {
  370. name: '中午区间',
  371. start: '13:00',
  372. timestamp: 1733806800000,
  373. hour: 13,
  374. timeFormat: '12/10/2024, 1:00:00 PM'
  375. },
  376. {
  377. name: '下午区间',
  378. start: '14:00',
  379. timestamp: 1733810400000,
  380. hour: 14,
  381. timeFormat: '12/10/2024, 2:00:00 PM'
  382. },
  383. {
  384. name: '下午区间',
  385. start: '15:00',
  386. timestamp: 1733814000000,
  387. hour: 15,
  388. timeFormat: '12/10/2024, 3:00:00 PM'
  389. },
  390. {
  391. name: '下午区间',
  392. start: '16:00',
  393. timestamp: 1733817600000,
  394. hour: 16,
  395. timeFormat: '12/10/2024, 4:00:00 PM'
  396. },
  397. {
  398. name: '下午区间',
  399. start: '17:52',
  400. timestamp: 1733821200000,
  401. hour: 17,
  402. timeFormat: '12/10/2024, 5:00:00 PM'
  403. },
  404. {
  405. name: '晚上区间',
  406. start: '18:00',
  407. timestamp: 1733824800000,
  408. hour: 18,
  409. timeFormat: '12/10/2024, 6:00:00 PM'
  410. },
  411. {
  412. name: '晚上区间',
  413. start: '19:05',
  414. timestamp: 1733828400000,
  415. hour: 19,
  416. timeFormat: '12/10/2024, 7:00:00 PM'
  417. },
  418. {
  419. name: '晚上区间',
  420. start: '20:00',
  421. timestamp: 1733832000000,
  422. hour: 20,
  423. timeFormat: '12/10/2024, 8:00:00 PM'
  424. },
  425. {
  426. name: '晚上区间',
  427. start: '21:00',
  428. timestamp: 1733835600000,
  429. hour: 21,
  430. timeFormat: '12/10/2024, 9:00:00 PM'
  431. },
  432. {
  433. name: '深夜区间',
  434. start: '22:00',
  435. timestamp: 1733839200000,
  436. hour: 22,
  437. timeFormat: '12/10/2024, 10:00:00 PM'
  438. },
  439. {
  440. name: '深夜区间',
  441. start: '23:00',
  442. timestamp: 1733842800000,
  443. hour: 23,
  444. timeFormat: '12/10/2024, 11:00:00 PM'
  445. }
  446. ]
  447. return ranges;
  448. }
  449. CMD.stopScheduler = function() {
  450. jobs.forEach(({ job }) => job.cancel());
  451. jobs = [];
  452. console.log('调度器已停止,所有任务已清除');
  453. }
  454. CMD.startScheduler = function(taskCallback) {
  455. CMD.stopScheduler()
  456. // 为每个时间点创建定时任务
  457. _24HourRanges.forEach(timeRange => {
  458. // 解析小时
  459. const hour = parseInt(timeRange.start.split(':')[0]);
  460. const minute = parseInt(timeRange.start.split(':')[1]);
  461. // 创建定时任务 - 在每天的指定小时整点执行
  462. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  463. try {
  464. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  465. // 执行回调函数
  466. await taskCallback({
  467. timeRange,
  468. executionTime: new Date()
  469. });
  470. } catch (error) {
  471. // 记录失败
  472. console.error('任务执行错误:', error);
  473. }
  474. });
  475. jobs.push({
  476. job,
  477. timeRange
  478. });
  479. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  480. });
  481. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  482. }
  483. CMD.init()