PULL_DATA.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. const CMD = {}
  2. const time_count = 5000;
  3. const redis_help = require('../src/use_redis');
  4. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  5. const pull_log_controllers = require('../src/data_manager/Controllers/pull_log_controllers');
  6. const helper = require('../src/helper');
  7. const axios = require('axios')
  8. const HttpClient = require('../src/HttpClient')
  9. const config = require('../etc/config.json');
  10. const DailyRecordManager = require('./daily_records');
  11. const schedule = require('node-schedule');
  12. const manager = new DailyRecordManager();
  13. var session = null
  14. var cur_page = 0
  15. var task_list = []
  16. var _24HourRanges = []
  17. var jobs = []
  18. var cur_day_data = []
  19. async function processTask(timeRange){
  20. try{
  21. if(cur_page==-1){
  22. throw "没有数据了"
  23. }
  24. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  25. FilterConfig = JSON.parse(FilterConfig)
  26. let response = await CMD.get_novel_material_list(cur_page)
  27. // console.log("get_novel_material_list:",response)
  28. let materialId_list = []
  29. let materialId_data_list = []
  30. if(response.errorCode!=0){
  31. console.log("errorCode:",response.errorCode)
  32. cur_page = -1
  33. throw "没有数据了"
  34. }
  35. if(response.result.list == undefined){
  36. cur_page = -1
  37. throw "没有数据了"
  38. }
  39. if(response.result.list.length<=0){
  40. cur_page = -1
  41. throw "没有数据了"
  42. }
  43. if(timeRange!=null){
  44. const today = new Date().toISOString().split('T')[0];
  45. console.log("response.result:",response.result)
  46. if(cur_day_data[today][timeRange.start]){
  47. console.log("response.result.count:",response.result.count)
  48. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data[today][timeRange.start])
  49. cur_day_data[today][timeRange.start].count = response.result.count
  50. }else{
  51. console.log("cur_day_data空")
  52. }
  53. const updateResult = await manager.updateRecord(today,cur_day_data[today]);
  54. }
  55. for (let index = 0; index < response.result.list.length; index++) {
  56. const origin_element = response.result.list[index];
  57. const result = await origin_data_controllers.getOriginData({
  58. materialId:origin_element.materialId})
  59. if(result.success){
  60. await origin_data_controllers.updateOriginData({id:result.data.id},
  61. {
  62. kepp_num:origin_element.favoriteCount.count,
  63. comment_num:origin_element.commentCount.count,
  64. like_num:origin_element.likeCount.count,
  65. shared_num:origin_element.shareCount.count
  66. })
  67. }else{
  68. if(CMD.isRight(FilterConfig,origin_element)){
  69. materialId_list.push(origin_element.materialId)
  70. materialId_data_list[origin_element.materialId] = {
  71. kepp_num:origin_element.favoriteCount.count,
  72. comment_num:origin_element.commentCount.count,
  73. like_num:origin_element.likeCount.count,
  74. shared_num:origin_element.shareCount.count
  75. }
  76. }
  77. }
  78. }
  79. response = await CMD.get_detail(materialId_list)
  80. for (let index = 0; index < response.result.list.length; index++) {
  81. const element = response.result.list[index];
  82. const info = materialId_data_list[element.materialId]
  83. if(element.hasAnchorInfo){
  84. await origin_data_controllers.createOriginData(
  85. {
  86. video_id:element.awemeId,
  87. materialId:element.materialId,
  88. video_link:element.awemeUrl,
  89. title:CMD.subTitle(element.title),
  90. publish_time:new Date(element.publishTime),
  91. kepp_num:info.kepp_num,
  92. comment_num:info.comment_num,
  93. like_num:info.like_num,
  94. shared_num:info.shared_num,
  95. is_guajian:1,
  96. guajian_link:element.landingUrl,
  97. status:0
  98. }
  99. )
  100. }
  101. }
  102. // console.log("get_detail:",response)
  103. // await pull_log_controllers.createData()
  104. }catch(e){
  105. console.log("PULL_DATA: error:",e)
  106. }finally{
  107. if(cur_page!=-1){
  108. cur_page++;
  109. global.setTimeout(processTask,time_count)
  110. }
  111. }
  112. }
  113. CMD.subTitle = function(title){
  114. if(title.length>299){
  115. return title.substring(0, 299);
  116. }
  117. return title
  118. }
  119. CMD.isRight = function(FilterConfig,origin_element){
  120. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  121. return false
  122. }
  123. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  124. return false
  125. }
  126. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  127. return false
  128. }
  129. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  130. return false
  131. }
  132. return true
  133. }
  134. CMD.getToken = async function(){
  135. const response = await session.post(config.pull_data_config.get_token,{
  136. userName:config.pull_data_config.userName,
  137. verCode:"1234",
  138. password:config.pull_data_config.password,
  139. loginType:"OPENAPI"
  140. });
  141. return response.data
  142. }
  143. CMD.get_novel_material_list = async function(page,size=500) {
  144. const now = new Date(); // 当前时间
  145. // const fiveMinutesAgo = new Date(now - 5 * 60 * 1000); // 5分钟前的时间
  146. const fiveMinutesAgo = new Date(now - 60 * 60 * 1000); // 60分钟前的时间
  147. let data = {
  148. startTime:fiveMinutesAgo.getTime(),
  149. endTime:now.getTime()
  150. }
  151. let params = `page=${page}&size=${size}&sort=update_time,desc`
  152. let url = config.pull_data_config.get_novel_material_list+params
  153. console.log(url,data)
  154. const response = await session.post(url,data, {
  155. headers: {
  156. 'token':await redis_help.getKeyValue("pull_data_token")
  157. }
  158. });
  159. return response.data
  160. }
  161. CMD.get_detail = async function(materialId_list) {
  162. let data = {
  163. list:materialId_list
  164. }
  165. const response = await session.post(config.pull_data_config.get_detail,data, {
  166. headers: {
  167. 'token':await redis_help.getKeyValue("pull_data_token")
  168. }
  169. });
  170. return response.data
  171. }
  172. CMD.init = async function(){
  173. session = axios.create({
  174. baseURL: config.isDebug?config.pull_data_config.debug_host:config.pull_data_config.release_host,
  175. headers: {
  176. 'Accept': 'application/json, text/plain, */*',
  177. }
  178. });
  179. redis_help.connect(async ()=>{
  180. let response = await CMD.getToken()
  181. console.log("getToken:",response.result.token)
  182. await redis_help.setKeyValue("pull_data_token",response.result.token)
  183. // processTask()
  184. _24HourRanges = CMD.generate24HourRanges()
  185. // 定义任务回调函数
  186. const taskCallback = async (context) => {
  187. var { timeRange, executionTime } = context;
  188. console.log('执行任务:', {
  189. 时间区间: timeRange.name,
  190. 开始时间: timeRange.start,
  191. 执行时间: executionTime
  192. });
  193. const today = new Date().toISOString().split('T')[0];
  194. for (const key in cur_day_data) {
  195. if (Object.prototype.hasOwnProperty.call(cur_day_data, key)) {
  196. if(key==today){
  197. }else{
  198. cur_day_data[key] = []
  199. }
  200. }
  201. }
  202. if(!cur_day_data[today]){
  203. cur_day_data[today] = {}
  204. }
  205. if(!cur_day_data[today][timeRange.start]){
  206. cur_day_data[today][timeRange.start] = {name:timeRange.name}
  207. }
  208. console.log("cur_day_data[today]:",cur_day_data[today])
  209. const result = await manager.createRecord(today,cur_day_data[today]);
  210. if(result.success){
  211. }
  212. cur_page = 0
  213. // 示例:实际任务逻辑
  214. await processTask(timeRange);
  215. };
  216. let key = new Date().toISOString().split('T')[0]
  217. let record_res = await manager.getRecord(key);
  218. if(record_res!=null){
  219. cur_day_data[key] = record_res.content
  220. }
  221. // 启动调度器
  222. CMD.startScheduler(taskCallback);
  223. })
  224. }
  225. // 生成24小时的时间区间
  226. CMD.generate24HourRanges = function() {
  227. // const ranges = [];
  228. // const today = new Date();
  229. // today.setMinutes(0);
  230. // today.setSeconds(0);
  231. // today.setMilliseconds(0);
  232. // for (let hour = 0; hour < 24; hour++) {
  233. // const timeString = `${hour.toString().padStart(2, '0')}:00`;
  234. // let periodName = '';
  235. // // 设置当天的小时
  236. // today.setHours(hour);
  237. // const timestamp = today.getTime();
  238. // // 根据时间划分时段
  239. // if (hour >= 0 && hour < 6) {
  240. // periodName = '凌晨区间';
  241. // } else if (hour >= 6 && hour < 9) {
  242. // periodName = '早晨区间';
  243. // } else if (hour >= 9 && hour < 12) {
  244. // periodName = '上午区间';
  245. // } else if (hour >= 12 && hour < 14) {
  246. // periodName = '中午区间';
  247. // } else if (hour >= 14 && hour < 18) {
  248. // periodName = '下午区间';
  249. // } else if (hour >= 18 && hour < 22) {
  250. // periodName = '晚上区间';
  251. // } else {
  252. // periodName = '深夜区间';
  253. // }
  254. // ranges.push({
  255. // name: periodName,
  256. // start: timeString,
  257. // timestamp: timestamp,
  258. // hour:hour,
  259. // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  260. // });
  261. // }
  262. let ranges = [
  263. {
  264. name: '凌晨区间',
  265. start: '00:00',
  266. timestamp: 1733760000000,
  267. hour: 0,
  268. timeFormat: '12/10/2024, 12:00:00 AM'
  269. },
  270. {
  271. name: '凌晨区间',
  272. start: '01:00',
  273. timestamp: 1733763600000,
  274. hour: 1,
  275. timeFormat: '12/10/2024, 1:00:00 AM'
  276. },
  277. {
  278. name: '凌晨区间',
  279. start: '02:00',
  280. timestamp: 1733767200000,
  281. hour: 2,
  282. timeFormat: '12/10/2024, 2:00:00 AM'
  283. },
  284. {
  285. name: '凌晨区间',
  286. start: '03:00',
  287. timestamp: 1733770800000,
  288. hour: 3,
  289. timeFormat: '12/10/2024, 3:00:00 AM'
  290. },
  291. {
  292. name: '凌晨区间',
  293. start: '04:00',
  294. timestamp: 1733774400000,
  295. hour: 4,
  296. timeFormat: '12/10/2024, 4:00:00 AM'
  297. },
  298. {
  299. name: '凌晨区间',
  300. start: '05:00',
  301. timestamp: 1733778000000,
  302. hour: 5,
  303. timeFormat: '12/10/2024, 5:00:00 AM'
  304. },
  305. {
  306. name: '早晨区间',
  307. start: '06:00',
  308. timestamp: 1733781600000,
  309. hour: 6,
  310. timeFormat: '12/10/2024, 6:00:00 AM'
  311. },
  312. {
  313. name: '早晨区间',
  314. start: '07:00',
  315. timestamp: 1733785200000,
  316. hour: 7,
  317. timeFormat: '12/10/2024, 7:00:00 AM'
  318. },
  319. {
  320. name: '早晨区间',
  321. start: '08:00',
  322. timestamp: 1733788800000,
  323. hour: 8,
  324. timeFormat: '12/10/2024, 8:00:00 AM'
  325. },
  326. {
  327. name: '上午区间',
  328. start: '09:00',
  329. timestamp: 1733792400000,
  330. hour: 9,
  331. timeFormat: '12/10/2024, 9:00:00 AM'
  332. },
  333. {
  334. name: '上午区间',
  335. start: '10:00',
  336. timestamp: 1733796000000,
  337. hour: 10,
  338. timeFormat: '12/10/2024, 10:00:00 AM'
  339. },
  340. {
  341. name: '上午区间',
  342. start: '11:00',
  343. timestamp: 1733799600000,
  344. hour: 11,
  345. timeFormat: '12/10/2024, 11:00:00 AM'
  346. },
  347. {
  348. name: '中午区间',
  349. start: '12:00',
  350. timestamp: 1733803200000,
  351. hour: 12,
  352. timeFormat: '12/10/2024, 12:00:00 PM'
  353. },
  354. {
  355. name: '中午区间',
  356. start: '13:00',
  357. timestamp: 1733806800000,
  358. hour: 13,
  359. timeFormat: '12/10/2024, 1:00:00 PM'
  360. },
  361. {
  362. name: '下午区间',
  363. start: '14:00',
  364. timestamp: 1733810400000,
  365. hour: 14,
  366. timeFormat: '12/10/2024, 2:00:00 PM'
  367. },
  368. {
  369. name: '下午区间',
  370. start: '15:00',
  371. timestamp: 1733814000000,
  372. hour: 15,
  373. timeFormat: '12/10/2024, 3:00:00 PM'
  374. },
  375. {
  376. name: '下午区间',
  377. start: '16:00',
  378. timestamp: 1733817600000,
  379. hour: 16,
  380. timeFormat: '12/10/2024, 4:00:00 PM'
  381. },
  382. {
  383. name: '下午区间',
  384. start: '17:52',
  385. timestamp: 1733821200000,
  386. hour: 17,
  387. timeFormat: '12/10/2024, 5:00:00 PM'
  388. },
  389. {
  390. name: '晚上区间',
  391. start: '18:00',
  392. timestamp: 1733824800000,
  393. hour: 18,
  394. timeFormat: '12/10/2024, 6:00:00 PM'
  395. },
  396. {
  397. name: '晚上区间',
  398. start: '19:00',
  399. timestamp: 1733828400000,
  400. hour: 19,
  401. timeFormat: '12/10/2024, 7:00:00 PM'
  402. },
  403. {
  404. name: '晚上区间',
  405. start: '20:00',
  406. timestamp: 1733832000000,
  407. hour: 20,
  408. timeFormat: '12/10/2024, 8:00:00 PM'
  409. },
  410. {
  411. name: '晚上区间',
  412. start: '21:00',
  413. timestamp: 1733835600000,
  414. hour: 21,
  415. timeFormat: '12/10/2024, 9:00:00 PM'
  416. },
  417. {
  418. name: '深夜区间',
  419. start: '22:00',
  420. timestamp: 1733839200000,
  421. hour: 22,
  422. timeFormat: '12/10/2024, 10:00:00 PM'
  423. },
  424. {
  425. name: '深夜区间',
  426. start: '23:00',
  427. timestamp: 1733842800000,
  428. hour: 23,
  429. timeFormat: '12/10/2024, 11:00:00 PM'
  430. }
  431. ]
  432. return ranges;
  433. }
  434. CMD.startScheduler = function(taskCallback) {
  435. // 为每个时间点创建定时任务
  436. _24HourRanges.forEach(timeRange => {
  437. // 解析小时
  438. const hour = parseInt(timeRange.start.split(':')[0]);
  439. const minute = parseInt(timeRange.start.split(':')[1]);
  440. // 创建定时任务 - 在每天的指定小时整点执行
  441. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  442. try {
  443. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  444. // 执行回调函数
  445. await taskCallback({
  446. timeRange,
  447. executionTime: new Date()
  448. });
  449. } catch (error) {
  450. // 记录失败
  451. console.error('任务执行错误:', error);
  452. }
  453. });
  454. jobs.push({
  455. job,
  456. timeRange
  457. });
  458. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  459. });
  460. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  461. }
  462. CMD.init()