geetest_crack.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. import os
  2. import time
  3. import logging
  4. import signal
  5. import subprocess
  6. import cv2
  7. import numpy as np
  8. from PIL import Image
  9. import io
  10. import base64
  11. import random
  12. from selenium import webdriver
  13. from selenium.webdriver.firefox.options import Options
  14. from selenium.webdriver.firefox.service import Service
  15. from selenium.webdriver.support.ui import WebDriverWait
  16. from selenium.webdriver.support import expected_conditions as EC
  17. from selenium.webdriver.common.by import By
  18. from selenium.webdriver.common.action_chains import ActionChains
  19. from contextlib import contextmanager
  20. # 配置日志
  21. logging.basicConfig(
  22. level=logging.INFO,
  23. format='%(asctime)s - %(levelname)s - %(message)s'
  24. )
  25. logger = logging.getLogger(__name__)
  26. class TimeoutException(Exception):
  27. pass
  28. @contextmanager
  29. def time_limit(seconds):
  30. def signal_handler(signum, frame):
  31. raise TimeoutException("Timed out!")
  32. signal.signal(signal.SIGALRM, signal_handler)
  33. signal.alarm(seconds)
  34. try:
  35. yield
  36. finally:
  37. signal.alarm(0)
  38. class GeetestCracker:
  39. def __init__(self):
  40. self.driver = None
  41. self.wait = None
  42. logger.info("初始化 GeetestCracker")
  43. def cleanup_processes(self):
  44. """清理残留进程"""
  45. try:
  46. subprocess.run(['pkill', '-f', 'firefox'], stderr=subprocess.DEVNULL)
  47. subprocess.run(['pkill', '-f', 'geckodriver'], stderr=subprocess.DEVNULL)
  48. time.sleep(2)
  49. logger.info("清理残留进程完成")
  50. except Exception as e:
  51. logger.error(f"清理进程时出错: {str(e)}")
  52. def setup_driver(self):
  53. """配置并初始化 WebDriver"""
  54. try:
  55. logger.info("开始设置浏览器驱动")
  56. # 首先清理可能的残留进程
  57. self.cleanup_processes()
  58. # 检查 geckodriver
  59. from shutil import which
  60. geckodriver_path = which('geckodriver')
  61. if not geckodriver_path:
  62. logger.error("未找到 geckodriver")
  63. return False
  64. logger.info(f"找到 geckodriver: {geckodriver_path}")
  65. # Firefox 配置 - 修改配置以提高稳定性
  66. options = Options()
  67. # 核心配置
  68. options.set_preference('marionette', True)
  69. options.set_preference('marionette.port', 2828) # 固定端口
  70. options.set_preference('network.http.connection-timeout', 10000)
  71. options.set_preference('network.http.response.timeout', 10000)
  72. # 禁用 JavaScript JIT
  73. options.set_preference('javascript.options.ion', False)
  74. options.set_preference('javascript.options.baselinejit', False)
  75. # 禁用硬件加速
  76. options.set_preference('layers.acceleration.disabled', True)
  77. # 禁用不必要的功能
  78. options.set_preference('browser.cache.disk.enable', False)
  79. options.set_preference('browser.cache.memory.enable', False)
  80. options.set_preference('browser.cache.offline.enable', False)
  81. options.set_preference('network.http.use-cache', False)
  82. options.set_preference('browser.tabs.remote.autostart', False)
  83. options.set_preference('browser.tabs.remote.autostart.2', False)
  84. options.set_preference('dom.ipc.processCount', 1)
  85. options.set_preference('browser.sessionstore.resume_from_crash', False)
  86. # 添加必要的参数
  87. options.add_argument('--headless')
  88. options.add_argument('--no-sandbox')
  89. options.add_argument('--disable-dev-shm-usage')
  90. options.add_argument('--disable-gpu')
  91. options.add_argument('--disable-extensions')
  92. options.add_argument('--disable-infobars')
  93. options.add_argument('--disable-notifications')
  94. options.add_argument('--window-size=1280,800')
  95. logger.info("创建 WebDriver 实例")
  96. # 使用临时目录
  97. import tempfile
  98. temp_dir = tempfile.mkdtemp()
  99. # Firefox 临时配置目录
  100. options.set_preference('profile', temp_dir)
  101. # 创建 Service 对象,添加更多日志选项
  102. service = Service(
  103. geckodriver_path,
  104. log_output=os.path.join(temp_dir, 'geckodriver.log'),
  105. service_args=[
  106. '--log', 'trace',
  107. '--marionette-port', '2828'
  108. ]
  109. )
  110. # 设置超时限制并重试
  111. max_attempts = 3
  112. for attempt in range(max_attempts):
  113. try:
  114. logger.info(f"尝试创建 WebDriver 实例 (尝试 {attempt + 1}/{max_attempts})")
  115. # 设置环境变量
  116. os.environ['MOZ_HEADLESS'] = '1'
  117. os.environ['DISPLAY'] = ':99'
  118. # 创建虚拟显示
  119. try:
  120. subprocess.run(['Xvfb', ':99', '-screen', '0', '1280x800x24'],
  121. start_new_session=True,
  122. stdout=subprocess.DEVNULL,
  123. stderr=subprocess.DEVNULL)
  124. except:
  125. logger.warning("Xvfb 启动失败,继续尝试")
  126. # 设置严格的超时
  127. with time_limit(20): # 减少超时时间
  128. self.driver = webdriver.Firefox(
  129. service=service,
  130. options=options
  131. )
  132. # 设置页面加载超时
  133. self.driver.set_page_load_timeout(10)
  134. self.driver.set_script_timeout(10)
  135. self.wait = WebDriverWait(self.driver, 10)
  136. # 测试连接
  137. logger.info("测试浏览器连接")
  138. self.driver.get('about:blank')
  139. logger.info("浏览器驱动初始化成功")
  140. return True
  141. except TimeoutException:
  142. logger.error(f"第 {attempt + 1} 次尝试超时")
  143. except Exception as e:
  144. logger.error(f"第 {attempt + 1} 次尝试失败: {str(e)}")
  145. # 清理资源
  146. self.cleanup_driver()
  147. self.cleanup_processes()
  148. if attempt < max_attempts - 1:
  149. logger.info("等待后重试...")
  150. time.sleep(5)
  151. else:
  152. raise Exception("在多次尝试后仍然失败")
  153. except Exception as e:
  154. logger.error(f"浏览器驱动初始化失败: {str(e)}")
  155. self.cleanup_driver()
  156. return False
  157. finally:
  158. # 清理临时目录
  159. try:
  160. import shutil
  161. shutil.rmtree(temp_dir, ignore_errors=True)
  162. except:
  163. pass
  164. # 停止虚拟显示
  165. try:
  166. subprocess.run(['pkill', 'Xvfb'],
  167. stdout=subprocess.DEVNULL,
  168. stderr=subprocess.DEVNULL)
  169. except:
  170. pass
  171. def cleanup_processes(self):
  172. """更彻底地清理残留进程"""
  173. try:
  174. # 使用 pkill 清理进程
  175. commands = [
  176. ['pkill', '-f', 'firefox'],
  177. ['pkill', '-f', 'geckodriver'],
  178. ['pkill', '-f', 'Xvfb'],
  179. ['killall', 'firefox'],
  180. ['killall', 'geckodriver'],
  181. ['killall', 'Xvfb']
  182. ]
  183. for cmd in commands:
  184. try:
  185. subprocess.run(cmd,
  186. stdout=subprocess.DEVNULL,
  187. stderr=subprocess.DEVNULL)
  188. except:
  189. continue
  190. # 使用 ps 查找并强制终止进程
  191. try:
  192. ps_output = subprocess.check_output(['ps', 'aux']).decode()
  193. for line in ps_output.split('\n'):
  194. if 'firefox' in line or 'geckodriver' in line or 'Xvfb' in line:
  195. try:
  196. pid = int(line.split()[1])
  197. os.kill(pid, signal.SIGKILL)
  198. except:
  199. continue
  200. except:
  201. pass
  202. time.sleep(2)
  203. logger.info("清理残留进程完成")
  204. except Exception as e:
  205. logger.error(f"清理进程时出错: {str(e)}")
  206. def navigate_to_page(self):
  207. """导航到目标页面"""
  208. try:
  209. logger.info("正在访问目标网页")
  210. max_attempts = 3
  211. for attempt in range(max_attempts):
  212. try:
  213. logger.info(f"尝试访问页面 (尝试 {attempt + 1}/{max_attempts})")
  214. # 增加页面加载超时时间
  215. self.driver.set_page_load_timeout(30)
  216. # 直接访问目标验证码页面
  217. target_url = 'https://open.yuewen.com/'
  218. logger.info(f"访问目标URL: {target_url}")
  219. self.driver.get(target_url)
  220. # 等待页面加载完成
  221. logger.info("等待页面加载")
  222. WebDriverWait(self.driver, 30).until(
  223. lambda driver: driver.execute_script("return document.readyState") == "complete"
  224. )
  225. # 检查页面是否正确加载
  226. try:
  227. # 验证是否存在验证码相关元素
  228. slide_button = WebDriverWait(self.driver, 10).until(
  229. EC.presence_of_element_located((By.CLASS_NAME, "geetest_slider_button"))
  230. )
  231. if not slide_button:
  232. raise Exception("未找到滑块验证码元素")
  233. logger.info("验证码页面加载成功")
  234. return True
  235. except Exception as e:
  236. logger.error(f"验证码元素检查失败: {str(e)}")
  237. raise
  238. except Exception as e:
  239. logger.error(f"第 {attempt + 1} 次尝试失败: {str(e)}")
  240. if attempt < max_attempts - 1:
  241. logger.info("等待后重试...")
  242. # 重置浏览器状态
  243. try:
  244. self.driver.execute_script("""
  245. window.stop();
  246. window.location.href = 'about:blank';
  247. """)
  248. except:
  249. pass
  250. time.sleep(5)
  251. else:
  252. raise Exception("页面访问在多次尝试后仍然失败")
  253. except Exception as e:
  254. logger.error(f"页面访问失败: {str(e)}")
  255. return False
  256. finally:
  257. # 重置页面加载超时为默认值
  258. try:
  259. self.driver.set_page_load_timeout(30)
  260. except:
  261. pass
  262. def setup_browser_config(self):
  263. """配置浏览器网络设置"""
  264. try:
  265. # 配置网络设置
  266. self.driver.execute_script("""
  267. navigator.connection = {
  268. effectiveType: '4g',
  269. rtt: 50,
  270. downlink: 10,
  271. saveData: false
  272. };
  273. """)
  274. # 设置自定义请求头
  275. self.driver.execute_cdp_cmd('Network.setUserAgentOverride', {
  276. "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
  277. })
  278. # 启用网络监控
  279. self.driver.execute_cdp_cmd('Network.enable', {})
  280. # 设置网络条件
  281. self.driver.execute_cdp_cmd('Network.emulateNetworkConditions', {
  282. 'offline': False,
  283. 'latency': 20, # 延迟时间(毫秒)
  284. 'downloadThroughput': 780 * 1024 / 8, # 下载速度(字节/秒)
  285. 'uploadThroughput': 330 * 1024 / 8, # 上传速度(字节/秒)
  286. 'connectionType': 'wifi'
  287. })
  288. return True
  289. except Exception as e:
  290. logger.error(f"配置浏览器网络设置失败: {str(e)}")
  291. return False
  292. def get_slider(self):
  293. """获取滑块元素"""
  294. try:
  295. logger.info("寻找滑块元素")
  296. # 尝试多种定位方式
  297. selectors = [
  298. (By.CLASS_NAME, "gt_slider_knob"),
  299. (By.CLASS_NAME, "geetest_slider_button"),
  300. (By.CLASS_NAME, "gt_slider_knob_new"),
  301. (By.CSS_SELECTOR, ".gt_slider_knob"),
  302. (By.CSS_SELECTOR, ".geetest_slider_button"),
  303. (By.XPATH, "//div[contains(@class, 'slider')]//div[contains(@class, 'knob')]")
  304. ]
  305. # 等待任意一个元素出现
  306. for selector in selectors:
  307. try:
  308. logger.info(f"尝试使用选择器: {selector}")
  309. element = WebDriverWait(self.driver, 10).until(
  310. EC.presence_of_element_located(selector)
  311. )
  312. if element:
  313. logger.info(f"成功找到滑块元素: {selector}")
  314. return element
  315. except:
  316. continue
  317. raise Exception("未能找到滑块元素")
  318. except Exception as e:
  319. logger.error(f"获取滑块元素失败: {str(e)}")
  320. return None
  321. def get_slider_background(self):
  322. """获取背景图片"""
  323. try:
  324. # 等待背景图片加载
  325. background = WebDriverWait(self.driver, 10).until(
  326. EC.presence_of_element_located((By.CLASS_NAME, "gt_box"))
  327. )
  328. # 获取背景图片的base64数据
  329. canvas = self.driver.execute_script(
  330. "return document.getElementsByClassName('gt_box')[0].toDataURL('image/png')"
  331. )
  332. # 转换base64为图片
  333. canvas = canvas.split(',')[1]
  334. image_data = base64.b64decode(canvas)
  335. image = Image.open(io.BytesIO(image_data))
  336. return image
  337. except Exception as e:
  338. logger.error(f"获取背景图片失败: {str(e)}")
  339. return None
  340. def get_slider_image(self):
  341. """获取滑块图片"""
  342. try:
  343. # 等待滑块图片加载
  344. slider = WebDriverWait(self.driver, 10).until(
  345. EC.presence_of_element_located((By.CLASS_NAME, "gt_slice"))
  346. )
  347. # 获取滑块图片的base64数据
  348. canvas = self.driver.execute_script(
  349. "return document.getElementsByClassName('gt_slice')[0].toDataURL('image/png')"
  350. )
  351. # 转换base64为图片
  352. canvas = canvas.split(',')[1]
  353. image_data = base64.b64decode(canvas)
  354. image = Image.open(io.BytesIO(image_data))
  355. return image
  356. except Exception as e:
  357. logger.error(f"获取滑块图片失败: {str(e)}")
  358. return None
  359. def get_gap(self, bg_image, slider_image):
  360. """计算滑块缺口位置"""
  361. try:
  362. # 转换图片格式
  363. bg = cv2.cvtColor(np.array(bg_image), cv2.COLOR_RGB2BGR)
  364. slider = cv2.cvtColor(np.array(slider_image), cv2.COLOR_RGB2BGR)
  365. # 计算差异
  366. diff = cv2.absdiff(bg, slider)
  367. mask = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
  368. ret, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
  369. # 查找轮廓
  370. contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  371. if contours:
  372. # 获取最大轮廓
  373. max_contour = max(contours, key=cv2.contourArea)
  374. x, y, w, h = cv2.boundingRect(max_contour)
  375. return x
  376. return None
  377. except Exception as e:
  378. logger.error(f"计算缺口位置失败: {str(e)}")
  379. return None
  380. def generate_track(self, distance):
  381. """生成移动轨迹"""
  382. tracks = []
  383. current = 0
  384. mid = distance * 3 / 4
  385. t = 0.2
  386. v = 0
  387. while current < distance:
  388. if current < mid:
  389. a = 2
  390. else:
  391. a = -3
  392. v0 = v
  393. v = v0 + a * t
  394. move = v0 * t + 1 / 2 * a * t * t
  395. current += move
  396. tracks.append(round(move))
  397. # 微调
  398. while sum(tracks) > distance:
  399. tracks[-1] -= 1
  400. while sum(tracks) < distance:
  401. tracks.append(1)
  402. # 添加回退
  403. tracks.extend([-1, -1, -2, -2, -1, -1])
  404. return tracks
  405. def move_slider(self, slider, tracks):
  406. """移动滑块"""
  407. try:
  408. ActionChains(self.driver).click_and_hold(slider).perform()
  409. for track in tracks:
  410. ActionChains(self.driver).move_by_offset(track, random.randint(-1, 1)).perform()
  411. time.sleep(random.uniform(0.01, 0.02))
  412. time.sleep(0.5)
  413. ActionChains(self.driver).release().perform()
  414. return True
  415. except Exception as e:
  416. logger.error(f"移动滑块失败: {str(e)}")
  417. return False
  418. def crack_captcha(self):
  419. """破解验证码主流程"""
  420. try:
  421. logger.info("开始破解验证码")
  422. # 获取滑块元素
  423. slider = self.get_slider()
  424. if not slider:
  425. return False
  426. # 获取图片
  427. bg_image = self.get_slider_background()
  428. slider_image = self.get_slider_image()
  429. if not bg_image or not slider_image:
  430. return False
  431. # 计算缺口位置
  432. gap = self.get_gap(bg_image, slider_image)
  433. if not gap:
  434. return False
  435. logger.info(f"缺口位置: {gap}")
  436. # 生成轨迹
  437. tracks = self.generate_track(gap)
  438. logger.info(f"生成轨迹: {len(tracks)}个点")
  439. # 移动滑块
  440. result = self.move_slider(slider, tracks)
  441. if not result:
  442. return False
  443. # 等待验证结果
  444. time.sleep(2)
  445. # 检查是否验证成功
  446. try:
  447. success = WebDriverWait(self.driver, 5).until(
  448. EC.presence_of_element_located((By.CLASS_NAME, "gt_success"))
  449. )
  450. if success:
  451. logger.info("验证成功")
  452. return True
  453. except:
  454. logger.error("验证失败")
  455. return False
  456. except Exception as e:
  457. logger.error(f"验证码破解失败: {str(e)}")
  458. return False
  459. def cleanup_driver(self):
  460. """清理 WebDriver 资源"""
  461. if hasattr(self, 'driver') and self.driver:
  462. try:
  463. self.driver.quit()
  464. except:
  465. pass
  466. finally:
  467. self.driver = None
  468. def run(self):
  469. """运行主程序"""
  470. try:
  471. logger.info("开始破解验证码流程")
  472. # 初始化浏览器
  473. if not self.setup_driver():
  474. return False
  475. # 访问目标页面
  476. if not self.navigate_to_page():
  477. return False
  478. # 破解验证码
  479. max_attempts = 3
  480. for attempt in range(max_attempts):
  481. logger.info(f"第 {attempt + 1} 次尝试破解验证码")
  482. if self.crack_captcha():
  483. return True
  484. time.sleep(2)
  485. return False
  486. except Exception as e:
  487. logger.error(f"程序执行失败: {str(e)}")
  488. return False
  489. finally:
  490. logger.info("程序执行结束")
  491. self.cleanup_driver()
  492. def __del__(self):
  493. """析构函数,确保资源被清理"""
  494. self.cleanup_driver()
  495. self.cleanup_processes()
  496. if __name__ == "__main__":
  497. cracker = GeetestCracker()
  498. result = cracker.run()
  499. print("破解结果:", "成功" if result else "失败")