PULL_DATA.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. const CMD = {}
  2. const time_count = 5000;
  3. const redis_help = require('../src/use_redis');
  4. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  5. const pull_log_controllers = require('../src/data_manager/Controllers/pull_log_controllers');
  6. const helper = require('../src/helper');
  7. const axios = require('axios')
  8. const HttpClient = require('../src/HttpClient')
  9. const config = require('../etc/config.json');
  10. const DailyRecordManager = require('./daily_records');
  11. const schedule = require('node-schedule');
  12. const manager = new DailyRecordManager();
  13. var session = null
  14. var cur_page = 0
  15. var task_list = []
  16. var _24HourRanges = []
  17. var jobs = []
  18. var cur_day_data = []
  19. async function processTask(timeRange){
  20. try{
  21. if(cur_page==-1){
  22. throw "没有数据了"
  23. }
  24. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  25. FilterConfig = JSON.parse(FilterConfig)
  26. let response = await CMD.get_novel_material_list(cur_page)
  27. // console.log("get_novel_material_list:",response)
  28. let materialId_list = []
  29. let materialId_data_list = []
  30. if(response.result.list == undefined){
  31. cur_page = -1
  32. throw "没有数据了"
  33. }
  34. if(response.result.list.length<=0){
  35. cur_page = -1
  36. throw "没有数据了"
  37. }
  38. if(timeRange!=null){
  39. const today = new Date().toISOString().split('T')[0];
  40. cur_day_data[today][timeRange.start] = {count:response.result.count}
  41. const updateResult = await manager.updateRecord(today,cur_day_data[timeRange.today]);
  42. }
  43. for (let index = 0; index < response.result.list.length; index++) {
  44. const origin_element = response.result.list[index];
  45. const result = await origin_data_controllers.getOriginData({
  46. materialId:origin_element.materialId})
  47. if(result.success){
  48. await origin_data_controllers.updateOriginData({id:result.data.id},
  49. {
  50. kepp_num:origin_element.favoriteCount.count,
  51. comment_num:origin_element.commentCount.count,
  52. like_num:origin_element.likeCount.count,
  53. shared_num:origin_element.shareCount.count
  54. })
  55. }else{
  56. if(CMD.isRight(FilterConfig,origin_element)){
  57. materialId_list.push(origin_element.materialId)
  58. materialId_data_list[origin_element.materialId] = {
  59. kepp_num:origin_element.favoriteCount.count,
  60. comment_num:origin_element.commentCount.count,
  61. like_num:origin_element.likeCount.count,
  62. shared_num:origin_element.shareCount.count
  63. }
  64. }
  65. }
  66. }
  67. response = await CMD.get_detail(materialId_list)
  68. for (let index = 0; index < response.result.list.length; index++) {
  69. const element = response.result.list[index];
  70. const info = materialId_data_list[element.materialId]
  71. if(element.hasAnchorInfo){
  72. await origin_data_controllers.createOriginData(
  73. {
  74. video_id:element.awemeId,
  75. materialId:element.materialId,
  76. video_link:element.awemeUrl,
  77. title:element.title,
  78. publish_time:new Date(element.publishTime),
  79. kepp_num:info.kepp_num,
  80. comment_num:info.comment_num,
  81. like_num:info.like_num,
  82. shared_num:info.shared_num,
  83. is_guajian:1,
  84. guajian_link:element.landingUrl,
  85. status:0
  86. }
  87. )
  88. }
  89. }
  90. // console.log("get_detail:",response)
  91. // await pull_log_controllers.createData()
  92. }catch(e){
  93. console.log("PULL_DATA: error:",e)
  94. }finally{
  95. if(cur_page!=-1){
  96. cur_page++;
  97. global.setTimeout(processTask,time_count)
  98. }
  99. }
  100. }
  101. CMD.isRight = function(FilterConfig,origin_element){
  102. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  103. return false
  104. }
  105. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  106. return false
  107. }
  108. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  109. return false
  110. }
  111. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  112. return false
  113. }
  114. return true
  115. }
  116. CMD.getToken = async function(){
  117. const response = await session.post(config.pull_data_config.get_token,{
  118. userName:config.pull_data_config.userName,
  119. verCode:"1234",
  120. password:config.pull_data_config.password,
  121. loginType:"OPENAPI"
  122. });
  123. return response.data
  124. }
  125. CMD.get_novel_material_list = async function(page,size=500) {
  126. const now = new Date(); // 当前时间
  127. // const fiveMinutesAgo = new Date(now - 5 * 60 * 1000); // 5分钟前的时间
  128. const fiveMinutesAgo = new Date(now - 60 * 60 * 1000); // 60分钟前的时间
  129. let data = {
  130. startTime:fiveMinutesAgo.getTime(),
  131. endTime:now.getTime()
  132. }
  133. let params = `page=${page}&size=${size}&sort=update_time,desc`
  134. let url = config.pull_data_config.get_novel_material_list+params
  135. console.log(url,data)
  136. const response = await session.post(url,data, {
  137. headers: {
  138. 'token':await redis_help.getKeyValue("pull_data_token")
  139. }
  140. });
  141. return response.data
  142. }
  143. CMD.get_detail = async function(materialId_list) {
  144. let data = {
  145. list:materialId_list
  146. }
  147. const response = await session.post(config.pull_data_config.get_detail,data, {
  148. headers: {
  149. 'token':await redis_help.getKeyValue("pull_data_token")
  150. }
  151. });
  152. return response.data
  153. }
  154. CMD.init = async function(){
  155. session = axios.create({
  156. baseURL: config.isDebug?config.pull_data_config.debug_host:config.pull_data_config.release_host,
  157. headers: {
  158. 'Accept': 'application/json, text/plain, */*',
  159. }
  160. });
  161. redis_help.connect(async ()=>{
  162. let response = await CMD.getToken()
  163. console.log("getToken:",response.result.token)
  164. await redis_help.setKeyValue("pull_data_token",response.result.token)
  165. // processTask()
  166. _24HourRanges = CMD.generate24HourRanges()
  167. // 定义任务回调函数
  168. const taskCallback = async (context) => {
  169. var { timeRange, executionTime } = context;
  170. console.log('执行任务:', {
  171. 时间区间: timeRange.name,
  172. 开始时间: timeRange.start,
  173. 执行时间: executionTime
  174. });
  175. const today = new Date().toISOString().split('T')[0];
  176. for (const key in cur_day_data) {
  177. if (Object.prototype.hasOwnProperty.call(cur_day_data, key)) {
  178. if(key==today){
  179. }else{
  180. cur_day_data[key] = []
  181. }
  182. }
  183. }
  184. if(!cur_day_data[today]){
  185. cur_day_data[today] = []
  186. }
  187. if(!cur_day_data[today][timeRange.start]){
  188. cur_day_data[today][timeRange.start] = {}
  189. }
  190. const result = await manager.createRecord(today,cur_day_data[today]);
  191. if(result.success){
  192. }else{
  193. cur_day_data[today] = await manager.getRecord(today);
  194. cur_day_data[today][timeRange.start] = {}
  195. }
  196. cur_page = 0
  197. // 示例:实际任务逻辑
  198. await processTask(timeRange);
  199. };
  200. // 启动调度器
  201. CMD.startScheduler(taskCallback);
  202. })
  203. }
  204. // 生成24小时的时间区间
  205. CMD.generate24HourRanges = function() {
  206. const ranges = [];
  207. const today = new Date();
  208. today.setMinutes(0);
  209. today.setSeconds(0);
  210. today.setMilliseconds(0);
  211. for (let hour = 0; hour < 24; hour++) {
  212. const timeString = `${hour.toString().padStart(2, '0')}:00`;
  213. let periodName = '';
  214. // 设置当天的小时
  215. today.setHours(hour);
  216. const timestamp = today.getTime();
  217. // 根据时间划分时段
  218. if (hour >= 0 && hour < 6) {
  219. periodName = '凌晨区间';
  220. } else if (hour >= 6 && hour < 9) {
  221. periodName = '早晨区间';
  222. } else if (hour >= 9 && hour < 12) {
  223. periodName = '上午区间';
  224. } else if (hour >= 12 && hour < 14) {
  225. periodName = '中午区间';
  226. } else if (hour >= 14 && hour < 18) {
  227. periodName = '下午区间';
  228. } else if (hour >= 18 && hour < 22) {
  229. periodName = '晚上区间';
  230. } else {
  231. periodName = '深夜区间';
  232. }
  233. ranges.push({
  234. name: periodName,
  235. start: timeString,
  236. timestamp: timestamp,
  237. hour:hour,
  238. timeFormat: new Date(timestamp).toLocaleString() // 可读的时间格式
  239. });
  240. }
  241. console.log("ranges:",ranges)
  242. return ranges;
  243. }
  244. CMD.startScheduler = function(taskCallback) {
  245. // 为每个时间点创建定时任务
  246. _24HourRanges.forEach(timeRange => {
  247. // 解析小时
  248. const hour = parseInt(timeRange.start.split(':')[0]);
  249. // 创建定时任务 - 在每天的指定小时整点执行
  250. const job = schedule.scheduleJob(`0 ${hour} * * *`, async () => {
  251. try {
  252. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  253. // 执行回调函数
  254. await taskCallback({
  255. timeRange,
  256. executionTime: new Date()
  257. });
  258. } catch (error) {
  259. // 记录失败
  260. console.error('任务执行错误:', error);
  261. }
  262. });
  263. jobs.push({
  264. job,
  265. timeRange
  266. });
  267. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  268. });
  269. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  270. }
  271. CMD.init()