const CMD = {} const time_count = 5000; const redis_help = require('../src/use_redis'); const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers'); const pull_log_controllers = require('../src/data_manager/Controllers/pull_log_controllers'); const helper = require('../src/helper'); const axios = require('axios') const HttpClient = require('../src/HttpClient') const config = require('../etc/config.json'); const DailyRecordManager = require('./daily_records'); const schedule = require('node-schedule'); const manager = new DailyRecordManager(); const rabbitMq = require('../src/mq/rabbit-mq'); var session = null var cur_page = 0 var task_list = [] var _24HourRanges = [] var jobs = [] var cur_day_data = [] const messageHandler = async (msg) => { // CMD.start_task(msg['data']['data']) console.log("msg:",msg) }; // 启动消费者 async function startConsumer() { try { await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config","updatePullConig"); } catch (error) { console.error('启动消费者失败:', error); } } async function processTask(timeRange){ try{ if(cur_page==-1){ throw "没有数据了" } await redis_help.setKeyValue("pull_data_token",await CMD.getToken().result.token) let FilterConfig = await redis_help.getKeyValue("FilterConfig") FilterConfig = JSON.parse(FilterConfig) let response = await CMD.get_novel_material_list(cur_page) // console.log("get_novel_material_list:",response) let materialId_list = [] let materialId_data_list = [] if(response.errorCode!=0){ console.log("errorCode:",response.errorCode) cur_page = -1 throw "没有数据了" } if(response.result.list == undefined){ cur_page = -1 throw "没有数据了" } if(response.result.list.length<=0){ cur_page = -1 throw "没有数据了" } if(timeRange!=null){ const today = new Date().toISOString().split('T')[0]; console.log("response.result:",response.result) if(cur_day_data[today][timeRange.start]){ console.log("response.result.count:",response.result.count) console.log("(cur_day_data[today][timeRange.start]:",cur_day_data[today][timeRange.start]) cur_day_data[today][timeRange.start].count = response.result.count }else{ console.log("cur_day_data空") } const updateResult = await manager.updateRecord(today,cur_day_data[today]); } for (let index = 0; index < response.result.list.length; index++) { const origin_element = response.result.list[index]; const result = await origin_data_controllers.getOriginData({ materialId:origin_element.materialId}) if(result.success){ await origin_data_controllers.updateOriginData({id:result.data.id}, { kepp_num:origin_element.favoriteCount.count, comment_num:origin_element.commentCount.count, like_num:origin_element.likeCount.count, shared_num:origin_element.shareCount.count }) }else{ if(CMD.isRight(FilterConfig,origin_element)){ materialId_list.push(origin_element.materialId) materialId_data_list[origin_element.materialId] = { kepp_num:origin_element.favoriteCount.count, comment_num:origin_element.commentCount.count, like_num:origin_element.likeCount.count, shared_num:origin_element.shareCount.count } } } } response = await CMD.get_detail(materialId_list) for (let index = 0; index < response.result.list.length; index++) { const element = response.result.list[index]; const info = materialId_data_list[element.materialId] if(element.hasAnchorInfo){ await origin_data_controllers.createOriginData( { video_id:element.awemeId, materialId:element.materialId, video_link:element.awemeUrl, title:CMD.subTitle(element.title), publish_time:new Date(element.publishTime), kepp_num:info.kepp_num, comment_num:info.comment_num, like_num:info.like_num, shared_num:info.shared_num, is_guajian:1, guajian_link:element.landingUrl, status:0 } ) } } // console.log("get_detail:",response) // await pull_log_controllers.createData() }catch(e){ console.log("PULL_DATA: error:",e) }finally{ if(cur_page!=-1){ cur_page++; global.setTimeout(processTask,time_count) } } } CMD.subTitle = function(title){ if(title.length>299){ return title.substring(0, 299); } return title } CMD.isRight = function(FilterConfig,origin_element){ if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确") return false } if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确") return false } if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确") return false } if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论") return false } return true } CMD.getToken = async function(){ const response = await session.post(config.pull_data_config.get_token,{ userName:config.pull_data_config.userName, verCode:"1234", password:config.pull_data_config.password, loginType:"OPENAPI" }); console.log("getToken:",response.data.result.token) return response.data } CMD.get_novel_material_list = async function(page,size=500) { const now = new Date(); // 当前时间 // const fiveMinutesAgo = new Date(now - 5 * 60 * 1000); // 5分钟前的时间 const fiveMinutesAgo = new Date(now - 60 * 60 * 1000); // 60分钟前的时间 let data = { startTime:fiveMinutesAgo.getTime(), endTime:now.getTime() } let params = `page=${page}&size=${size}&sort=update_time,desc` let url = config.pull_data_config.get_novel_material_list+params console.log(url,data) const response = await session.post(url,data, { headers: { 'token':await redis_help.getKeyValue("pull_data_token") } }); return response.data } CMD.get_detail = async function(materialId_list) { let data = { list:materialId_list } const response = await session.post(config.pull_data_config.get_detail,data, { headers: { 'token':await redis_help.getKeyValue("pull_data_token") } }); return response.data } CMD.init = async function(){ session = axios.create({ baseURL: config.isDebug?config.pull_data_config.debug_host:config.pull_data_config.release_host, headers: { 'Accept': 'application/json, text/plain, */*', } }); // await startConsumer(); redis_help.connect(async ()=>{ let response = await CMD.getToken() await redis_help.setKeyValue("pull_data_token",response.result.token) // processTask() _24HourRanges = CMD.generate24HourRanges() // 定义任务回调函数 const taskCallback = async (context) => { var { timeRange, executionTime } = context; console.log('执行任务:', { 时间区间: timeRange.name, 开始时间: timeRange.start, 执行时间: executionTime }); const today = new Date().toISOString().split('T')[0]; for (const key in cur_day_data) { if (Object.prototype.hasOwnProperty.call(cur_day_data, key)) { if(key==today){ }else{ cur_day_data[key] = [] } } } if(!cur_day_data[today]){ cur_day_data[today] = {} } if(!cur_day_data[today][timeRange.start]){ cur_day_data[today][timeRange.start] = {name:timeRange.name} } console.log("cur_day_data[today]:",cur_day_data[today]) const result = await manager.createRecord(today,cur_day_data[today]); if(result.success){ } cur_page = 0 // 示例:实际任务逻辑 await processTask(timeRange); }; let key = new Date().toISOString().split('T')[0] let record_res = await manager.getRecord(key); if(record_res!=null){ cur_day_data[key] = record_res.content } // 启动调度器 CMD.startScheduler(taskCallback); }) } // 生成24小时的时间区间 CMD.generate24HourRanges = function() { // const ranges = []; // const today = new Date(); // today.setMinutes(0); // today.setSeconds(0); // today.setMilliseconds(0); // for (let hour = 0; hour < 24; hour++) { // const timeString = `${hour.toString().padStart(2, '0')}:00`; // let periodName = ''; // // 设置当天的小时 // today.setHours(hour); // const timestamp = today.getTime(); // // 根据时间划分时段 // if (hour >= 0 && hour < 6) { // periodName = '凌晨区间'; // } else if (hour >= 6 && hour < 9) { // periodName = '早晨区间'; // } else if (hour >= 9 && hour < 12) { // periodName = '上午区间'; // } else if (hour >= 12 && hour < 14) { // periodName = '中午区间'; // } else if (hour >= 14 && hour < 18) { // periodName = '下午区间'; // } else if (hour >= 18 && hour < 22) { // periodName = '晚上区间'; // } else { // periodName = '深夜区间'; // } // ranges.push({ // name: periodName, // start: timeString, // timestamp: timestamp, // hour:hour, // timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式 // }); // } let ranges = [ { name: '凌晨区间', start: '00:00', timestamp: 1733760000000, hour: 0, timeFormat: '12/10/2024, 12:00:00 AM' }, { name: '凌晨区间', start: '01:00', timestamp: 1733763600000, hour: 1, timeFormat: '12/10/2024, 1:00:00 AM' }, { name: '凌晨区间', start: '02:00', timestamp: 1733767200000, hour: 2, timeFormat: '12/10/2024, 2:00:00 AM' }, { name: '凌晨区间', start: '03:00', timestamp: 1733770800000, hour: 3, timeFormat: '12/10/2024, 3:00:00 AM' }, { name: '凌晨区间', start: '04:00', timestamp: 1733774400000, hour: 4, timeFormat: '12/10/2024, 4:00:00 AM' }, { name: '凌晨区间', start: '05:00', timestamp: 1733778000000, hour: 5, timeFormat: '12/10/2024, 5:00:00 AM' }, { name: '早晨区间', start: '06:00', timestamp: 1733781600000, hour: 6, timeFormat: '12/10/2024, 6:00:00 AM' }, { name: '早晨区间', start: '07:00', timestamp: 1733785200000, hour: 7, timeFormat: '12/10/2024, 7:00:00 AM' }, { name: '早晨区间', start: '08:00', timestamp: 1733788800000, hour: 8, timeFormat: '12/10/2024, 8:00:00 AM' }, { name: '上午区间', start: '09:00', timestamp: 1733792400000, hour: 9, timeFormat: '12/10/2024, 9:00:00 AM' }, { name: '上午区间', start: '10:00', timestamp: 1733796000000, hour: 10, timeFormat: '12/10/2024, 10:00:00 AM' }, { name: '上午区间', start: '11:00', timestamp: 1733799600000, hour: 11, timeFormat: '12/10/2024, 11:00:00 AM' }, { name: '中午区间', start: '12:00', timestamp: 1733803200000, hour: 12, timeFormat: '12/10/2024, 12:00:00 PM' }, { name: '中午区间', start: '13:00', timestamp: 1733806800000, hour: 13, timeFormat: '12/10/2024, 1:00:00 PM' }, { name: '下午区间', start: '14:00', timestamp: 1733810400000, hour: 14, timeFormat: '12/10/2024, 2:00:00 PM' }, { name: '下午区间', start: '15:00', timestamp: 1733814000000, hour: 15, timeFormat: '12/10/2024, 3:00:00 PM' }, { name: '下午区间', start: '16:00', timestamp: 1733817600000, hour: 16, timeFormat: '12/10/2024, 4:00:00 PM' }, { name: '下午区间', start: '17:00', timestamp: 1733821200000, hour: 17, timeFormat: '12/10/2024, 5:00:00 PM' }, { name: '晚上区间', start: '18:00', timestamp: 1733824800000, hour: 18, timeFormat: '12/10/2024, 6:00:00 PM' }, { name: '晚上区间', start: '19:00', timestamp: 1733828400000, hour: 19, timeFormat: '12/10/2024, 7:00:00 PM' }, { name: '晚上区间', start: '20:43', timestamp: 1733832000000, hour: 20, timeFormat: '12/10/2024, 8:00:00 PM' }, { name: '晚上区间', start: '21:00', timestamp: 1733835600000, hour: 21, timeFormat: '12/10/2024, 9:00:00 PM' }, { name: '深夜区间', start: '22:00', timestamp: 1733839200000, hour: 22, timeFormat: '12/10/2024, 10:00:00 PM' }, { name: '深夜区间', start: '23:00', timestamp: 1733842800000, hour: 23, timeFormat: '12/10/2024, 11:00:00 PM' } ] return ranges; } CMD.stopScheduler = function() { jobs.forEach(({ job }) => job.cancel()); jobs = []; console.log('调度器已停止,所有任务已清除'); } CMD.startScheduler = function(taskCallback) { CMD.stopScheduler() // 为每个时间点创建定时任务 _24HourRanges.forEach(timeRange => { // 解析小时 const hour = parseInt(timeRange.start.split(':')[0]); const minute = parseInt(timeRange.start.split(':')[1]); // 创建定时任务 - 在每天的指定小时整点执行 const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => { try { console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`); // 执行回调函数 await taskCallback({ timeRange, executionTime: new Date() }); } catch (error) { // 记录失败 console.error('任务执行错误:', error); } }); jobs.push({ job, timeRange }); console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`); }); console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`); } CMD.init()