PULL_DATA_NEW.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. const CMD = {}
  2. const redis_help = require('../src/use_redis');
  3. const origin_data_controllers = require('../src/data_manager/Controllers/origin_data_controllers');
  4. const config = require('../etc/config.json');
  5. const DailyRecordManager = require('./daily_records');
  6. const schedule = require('node-schedule');
  7. const manager = new DailyRecordManager();
  8. manager.init("daily_records2")
  9. const rabbitMq = require('../src/mq/rabbit-mq');
  10. const helper = require('../src/helper');
  11. const PullDataServiceNew = require('../src/PullDataServiceNew');
  12. var _24HourRanges = []
  13. var jobs = []
  14. var cur_day_data = new Map()
  15. var cur_timeRange = null
  16. var pullDataService = new PullDataServiceNew(redis_help)
  17. var cur_page = 0
  18. const messageHandler = async (msg) => {
  19. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig2")
  20. PullDataConfig = JSON.parse(PullDataConfig)
  21. _24HourRanges = PullDataConfig
  22. CMD.startScheduler(taskCallback)
  23. };
  24. const taskCallback = async (context) => {
  25. var { timeRange, executionTime } = context;
  26. console.log('执行任务:', {
  27. 时间区间: timeRange.name,
  28. 开始时间: timeRange.start,
  29. 执行时间: executionTime
  30. });
  31. const today = helper.getLocalDate();
  32. cur_day_data.forEach((v,k)=>{
  33. if(k==today){
  34. }else{
  35. cur_day_data.set(k, {})
  36. }
  37. })
  38. if(!cur_day_data.has(today)){
  39. cur_day_data.set(today,{})
  40. }
  41. if(!cur_day_data.get(today)[timeRange.start]){
  42. cur_day_data.get(today)[timeRange.start] = {name:timeRange.name,interval_minute:timeRange.interval_minute,finish_count:0}
  43. }
  44. console.log("cur_day_data[today]:",cur_day_data.get(today),today)
  45. const existingRecord = await manager.getRecord(today);
  46. if (existingRecord) {
  47. // 如果存在今天的记录,则更新
  48. const result = await manager.updateRecord(today, cur_day_data.get(today));
  49. console.log("更新记录结果:", result);
  50. } else {
  51. // 如果不存在今天的记录,则创建新记录
  52. const result = await manager.createRecord(today, cur_day_data.get(today));
  53. console.log("创建新记录结果:", result);
  54. }
  55. cur_timeRange = timeRange
  56. cur_timeRange.today = today
  57. cur_timeRange.finish_count = 0
  58. cur_page = 0
  59. await processTask();
  60. };
  61. // 启动消费者
  62. async function startConsumer() {
  63. try {
  64. await rabbitMq.consumerDirectMsg(messageHandler,"exchange_update_pull_config2","updatePullConig2");
  65. } catch (error) {
  66. console.error('启动消费者失败:', error);
  67. }
  68. }
  69. function get_detail_by_materialId(list,materialId){
  70. for (let index = 0; index < list.length; index++) {
  71. const element = list[index];
  72. if(element.materialId==materialId){
  73. return element
  74. }
  75. }
  76. return null
  77. }
  78. function generate_pull_time_data(list,material_list){
  79. let pull_data = []
  80. for (let index = 0; index < material_list.length; index++) {
  81. const element = material_list[index];
  82. let info = get_detail_by_materialId(list,element.materialId)
  83. if(info!=null){
  84. let new_data = {
  85. awemeId:info.awemeId,
  86. materialId:info.materialId,
  87. awemeUrl:info.awemeUrl,
  88. title:info.title,
  89. publishTime:info.publishTime,
  90. hasAnchorInfo:info.hasAnchorInfo,
  91. landingUrl:info.landingUrl,
  92. createTime:info.createTime,
  93. updateTime:info.updateTime,
  94. favoriteCount:element.favoriteCount.count,
  95. commentCount:element.commentCount.count,
  96. likeCount:element.likeCount.count,
  97. shareCount:element.shareCount.count
  98. }
  99. pull_data.push(new_data)
  100. }
  101. }
  102. sendPullDataMQMessage(JSON.stringify({cmd:"on_recv_pull_data",data:pull_data,today:cur_timeRange.today}))
  103. }
  104. async function process_material_list(response,page) {
  105. try{
  106. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  107. FilterConfig = JSON.parse(FilterConfig)
  108. let materialId_list = []
  109. let materialId_data_list = []
  110. if(!response.success){
  111. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  112. }
  113. if(response.data == undefined||response.data == null){
  114. throw {msg:response,timeRange:cur_timeRange,fun:"get_novel_material_list"}
  115. }
  116. if(response.data.list == undefined||response.data.list == null){
  117. throw "没有数据了"
  118. }
  119. if(response.data.list.length<=0){
  120. throw "没有数据了"
  121. }
  122. if(cur_timeRange!=null){ //统计
  123. const today = cur_timeRange.today;
  124. // console.log("response.data:",response.data,today)
  125. if(cur_day_data.get(today)[cur_timeRange.start]){
  126. console.log("response.data.count:",response.data.count)
  127. console.log("(cur_day_data[today][timeRange.start]:",cur_day_data.get(today)[cur_timeRange.start])
  128. cur_timeRange.count = cur_timeRange.count + response.data.list.length
  129. cur_timeRange.cur_page = page
  130. cur_day_data.get(today)[cur_timeRange.start].start_time = cur_timeRange.start_time
  131. cur_day_data.get(today)[cur_timeRange.start].pull_day = cur_timeRange.pull_day
  132. cur_day_data.get(today)[cur_timeRange.start].pull_time = cur_timeRange.pull_time ||{}
  133. cur_day_data.get(today)[cur_timeRange.start].count = response.data.count
  134. cur_day_data.get(today)[cur_timeRange.start].cur_page = page
  135. }else{
  136. console.log("cur_day_data空")
  137. }
  138. manager.updateRecord(today,cur_day_data.get(today));
  139. }
  140. for (let index = 0; index < response.data.list.length; index++) {
  141. const origin_element = response.data.list[index];
  142. materialId_list.push(origin_element.materialId)
  143. materialId_data_list[origin_element.materialId] = {
  144. kepp_num:origin_element.favoriteCount.count,
  145. comment_num:origin_element.commentCount.count,
  146. like_num:origin_element.likeCount.count,
  147. shared_num:origin_element.shareCount.count
  148. }
  149. }
  150. let result = await origin_data_controllers.findNonExistentMaterialIds(materialId_list)
  151. return {list:response.data.list,id_list:materialId_list,materialId_list:result.data.nonExistentIds,materialId_data_list:materialId_data_list,count:response.data.list.length}
  152. }catch(e){
  153. console.log("materialId_list:",e)
  154. return null
  155. }
  156. }
  157. async function processTask(){
  158. try{
  159. let FilterConfig = await redis_help.getKeyValue("FilterConfig")
  160. FilterConfig = JSON.parse(FilterConfig)
  161. let response = await pullDataService.get_novel_material_list(cur_timeRange,cur_page,500)
  162. if(response.msg=="success"){
  163. if(response.data.list.length<=0){
  164. cur_page = -1;
  165. throw response
  166. }
  167. let last_data = response.data.list[response.data.list.length-1]
  168. cur_page = new Date(last_data["updateTime"]);
  169. let endTime = new Date(cur_timeRange.pull_time.endTime);
  170. console.log("endTime:",cur_page.getTime(),endTime.getTime())
  171. if(cur_page.getTime()>=endTime.getTime()){
  172. cur_page = -1;
  173. throw response
  174. }
  175. }else{
  176. cur_page = -1;
  177. throw response
  178. }
  179. if(cur_timeRange!=null){
  180. cur_timeRange.finish_count+=response.data.list.length
  181. const today = cur_timeRange.today;
  182. cur_day_data.get(today)[cur_timeRange.start].finish_count = cur_timeRange.finish_count
  183. await manager.updateRecord(today,cur_day_data.get(today));
  184. }
  185. sendPullDataMQMessage(JSON.stringify({cmd:"on_recv_pull_data",data:response.data.list,today:cur_timeRange.today}))
  186. // if(detail_item.materialId_list.length<=0){
  187. // }else{
  188. // await processDetailTask(detail_item)
  189. // }
  190. console.log("processTask over!")
  191. }catch(e){
  192. console.error("PULL_DATA: error:",e)
  193. }finally{
  194. if(cur_page!=-1){
  195. cur_page++;
  196. global.setTimeout(processTask,1000)
  197. }
  198. }
  199. }
  200. CMD.subTitle = function(title){
  201. if(title.length>299){
  202. return title.substring(0, 299);
  203. }
  204. return title
  205. }
  206. CMD.isRight = function(FilterConfig,origin_element){
  207. if(origin_element.likeCount.count < FilterConfig.like_num){// console.log("点赞数正确")
  208. return false
  209. }
  210. if(origin_element.favoriteCount.count < FilterConfig.kepp_num){ // console.log("收藏数正确")
  211. return false
  212. }
  213. if(origin_element.shareCount.count < FilterConfig.shared_num){ // console.log("分享数正确")
  214. return false
  215. }
  216. if(origin_element.commentCount.count < FilterConfig.comment_num){ // console.log("评论")
  217. return false
  218. }
  219. return true
  220. }
  221. CMD.init = async function(){
  222. // await startConsumer();
  223. redis_help.connect(async ()=>{
  224. startConsumer()
  225. let PullDataConfig = await redis_help.getKeyValue("PullDataConfig2")
  226. PullDataConfig = JSON.parse(PullDataConfig)
  227. // processTask()
  228. _24HourRanges = PullDataConfig
  229. // 定义任务回调函数
  230. let key = helper.getLocalDate();
  231. let record_res = await manager.getRecord(key);
  232. if(record_res!=null){
  233. if(!cur_day_data.has(key)){
  234. cur_day_data.set(key,record_res.content)
  235. }
  236. }
  237. // 启动调度器
  238. CMD.startScheduler(taskCallback);
  239. })
  240. }
  241. // 生成24小时的时间区间
  242. CMD.generate24HourRanges = function() {
  243. return {};
  244. }
  245. CMD.stopScheduler = function() {
  246. jobs.forEach(({ job }) => job.cancel());
  247. jobs = [];
  248. console.log('调度器已停止,所有任务已清除');
  249. }
  250. CMD.startScheduler = function(taskCallback) {
  251. CMD.stopScheduler()
  252. // 为每个时间点创建定时任务
  253. console.error("_24HourRanges:",_24HourRanges)
  254. if(Object.keys(_24HourRanges).length<=0){
  255. return
  256. }
  257. _24HourRanges.forEach(timeRange => {
  258. // 解析小时
  259. const hour = parseInt(timeRange.start.split(':')[0]);
  260. const minute = parseInt(timeRange.start.split(':')[1]);
  261. // 创建定时任务 - 在每天的指定小时整点执行
  262. const job = schedule.scheduleJob(`${minute} ${hour} * * *`, async () => {
  263. try {
  264. console.log(`开始执行任务: ${timeRange.name} ${timeRange.start}`);
  265. // 执行回调函数
  266. await taskCallback({
  267. timeRange,
  268. executionTime: new Date()
  269. });
  270. } catch (error) {
  271. // 记录失败
  272. console.error('任务执行错误:', error);
  273. }
  274. });
  275. jobs.push({
  276. job,
  277. timeRange
  278. });
  279. console.log(`已安排任务: ${timeRange.name} ${timeRange.start}`);
  280. });
  281. console.log(`调度器已启动,共设置 ${jobs.length} 个定时任务`);
  282. }
  283. async function sendPullDataMQMessage(message,routingKey = "on_recv_pull_data") {
  284. try {
  285. if(routingKey!=null){
  286. await rabbitMq.producerDirectMsg( message,"exchange_pull_data_system_new",routingKey);
  287. }else{
  288. await rabbitMq.producerDirectMsg( message,"exchange_pull_data_system_new");
  289. }
  290. console.log('消息发送成功');
  291. } catch (error) {
  292. console.error('发送消息失败:', error);
  293. }
  294. }
  295. // if(config.isDebug){
  296. CMD.init()
  297. // }