|
|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 8 ^/ Y& k5 v% h8 i' r. n
8 v$ Y/ u3 u: { p3 |0 n( v/ P3 A5 b
+ c J; n& y+ L$ |; G4 g q
- - X, X0 Y& `" [( {. O" L Q
- # -*- coding: utf-8 -*-3 ^. ~- o4 n' R% E1 a. y" P
- . D+ U! W$ l" s( R& c1 R8 B- W
- from aip import AipOcr9 o* y4 I: O7 [ I. q9 ?5 p4 d
- - C7 {1 d" |) r9 R8 L/ y! R3 F6 W, J
- from selenium import webdriver8 Y* O# v0 b; u% K/ E, G- U R. G( \
/ x' O% d0 g; ?' ]- import time
+ }$ L$ ]6 H* I+ _ \5 }$ K+ B V - 0 i4 Q0 L$ I! a" x# X4 D! ] P- H
- import random2 j1 {, h6 ^. r. i
' y! C5 b; O; Y* ]- import sys,re
* p9 N* A- }& P
4 P( r5 P7 X* n, E2 e5 m! Q- from PIL import Image, ImageDraw,ImageFont" F1 M0 I7 n6 F+ r2 g
. Y( ]" G% l V e1 n- """ 你的 APPID AK SK """$ s5 [) n. J4 B( ~4 N2 L
- 6 W8 e% \( G, ?- I
- APP_ID = 'xxx'* Y9 s) S" H4 j6 X4 j' C* ]
- ! c7 T, `+ B8 x* p4 I, ?6 l/ [" g
- API_KEY = 'xxx'/ l& W$ x( h7 I
- ( B! d/ R3 J+ c% _' G
- SECRET_KEY = xxx'
' r5 l/ J# x! |1 N
! ?$ H4 _2 M# |2 w% z- client = AipOcr(APP_ID, API_KEY, SECRET_KEY). R+ S/ U- u3 Q0 c& B, K
1 L! I; M* z+ u" \6 A: H- S- #PROXY = "127.0.0.1:8118"
4 Q* c5 L* P' P8 W- D+ P4 U
) X" R5 v# R. T5 H+ J* k; E; g" F- chrome_options = webdriver.ChromeOptions()* o7 Z9 O i2 z* a
( ? Y- @2 C; C6 s- D7 x7 }- chrome_options.add_argument('--headless')
0 ~" v1 n1 Q3 g& T - 7 G( y7 r4 a7 H( v9 U. n
- chrome_options.add_argument('--disable-gpu')7 I9 Q! R5 M5 R! A$ n4 r
- 9 e! @1 U! D$ \/ z5 q# W9 [
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)
, S" _7 b/ Q3 x - ; a- y5 ?7 K) ~& q' z
- chrome_options.add_argument("--incognito")
6 }$ R3 T. \; }: K2 v3 a5 x7 u
# u# E3 \- q- K5 Z/ D1 ?- chrome_options.add_argument('--ignore-certificate-errors')+ a& y$ |" D# o& v, O
8 t) R( Y, U8 O Q1 O- # Win
( M$ Z: r* ~& K3 f - 3 R0 C6 w% D5 T. C4 _6 b& @
- # chrome_options.add_argument("--log-level=3")
9 T* v/ K! m% j- s. L" k
6 H( n& S0 m; H# H: }) C- # chrome_options.add_argument("--disable-logging"). C) K% [* K% R7 {' A V2 b
- ( n. m5 v' m0 {: L+ s
- # chrome_options.add_argument("--disable-logging")/ E1 }$ |6 i& ~0 f# d; `7 s
- x$ l# D2 Q X6 \- y! O5 D
- #chrome_options.add_argument('--no-sandbox')
1 [) A$ k5 T. R( [ - * _7 \, M- m* a6 }+ v& ?4 y
- """ 读取图片 """- Z( M, y) T1 h" m$ J7 n
2 j5 Z" d/ [0 ]9 |5 S- def get_file_content(filePath):
* ? z( z" i* ^5 H6 ~! U
( H* g) e9 M. r8 l) q- with open(filePath, 'rb') as fp:6 c- V/ {4 P' L) W. l# m+ n2 x) I
U3 W s& z2 \! v- return fp.read(). S0 r- z1 [$ E
- * l- x5 k( E4 {, J. d
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
0 x9 d, V4 ]0 M( q
* r& p0 H* S, v3 b- driver.set_window_size(1280, 727)
W% [ `* H0 E6 F
Z. c! D$ H, K7 X* N- driver.get("https://youlikehits.com/")
7 B- Z E( w. _6 | - ' s5 j5 c4 G* X/ T7 k- W
- time.sleep(5)! j5 m+ d; b8 x% d
- 8 s$ d$ u, ~' m- N
- driver.find_element_by_id("username").send_keys('11111') #user
5 v q" u1 B1 i. c5 \
! f0 l% k4 \& Z% d- driver.find_element_by_id("password").send_keys('111111') #password
7 s4 E5 o! u* h* a& G
# y- v% n! ]6 ?" u* T+ a: u$ ~- driver.find_element_by_xpath("//input[@value='Login']").click()3 D, v4 j2 ^9 Q2 w
- 5 l) S+ }' P$ @9 f/ O C' t
- driver.get("https://youlikehits.com/youtubenew2.php")! S+ s- M9 X: [7 @2 x U
x/ \' S& p$ n# c5 [5 U- time.sleep(1)+ f2 ~4 c9 W& ?5 b
: K4 z3 g% S7 @4 [2 u; l4 I' g- #Try Again
5 u3 ]0 o: o8 B, Z - - k# F! b* R: s& ]" B5 d- ?2 W
- def checkRefresh(driver):
' {" n" V/ ]. u
. V8 a% W: A8 T/ X1 |. a- try:
9 U/ ~7 `: L9 R' o+ E
" _& U1 U) s/ o/ ]0 }- Refresh = driver.find_element_by_id('loadmore')
, R0 d0 w4 W. C- K0 D" ?
) {1 p [1 l& L2 M- `- Refresh.click()
- O/ G9 |/ Q. O9 ^
* d. I/ P" O, I' U- driver.set_window_size(1280, 727)
: h% r" v( b4 |$ K2 I: K5 U - . K# }' o9 M: b$ B# X- f6 M9 d
- except Exception as e:. J; |) C8 u5 F
4 K" I3 y! m% M0 p/ y0 `- pass
6 m, @# c% y/ b0 Y. k. L, W% E - # M0 L# q, u) Q
- def checkcaptcha(driver):; C5 J" v' Q' a7 r- U0 P1 f3 ]
- 4 J6 v, K, {5 E5 `) W1 v3 l
- try:8 n4 Q- d( Y4 a2 n$ W
- 6 W4 g0 V! i2 l3 Y0 S
- captcha = driver.find_element_by_id('captcha')& C$ D. k( L3 Y! l2 L
- ! Q/ k( v- n4 c1 B) y& [
- print driver.get_window_size()3 t4 ^8 C2 c. j
0 n- ?: J" {2 n# s- time.sleep(2)
w. ~2 T7 E3 j' |1 z* d
9 q+ I# b0 P1 y4 l- driver.save_screenshot('/tmp/screenshot.png')+ H# l% T' b8 T& ~5 y3 w9 y8 Z. ]- q
! T$ U/ \! h9 o. @1 U- im = Image.open('/tmp/screenshot.png')3 r& A2 m; j( @0 C9 v( c; p" [
+ @- m" j+ }) n. K- #取消headless模式
- E: A9 @ R& C, g' w$ | - + y: o' w. i/ j% n2 Z. O# k3 a; p
- #a = im.resize((1269, 610),Image.ANTIALIAS)
+ `" z2 V, K) K8 [ - . J- \) f0 y- S* b* V
- 6 Y3 T& Q9 r) z0 x4 J: i" [# D
- " [% J/ `' `9 {) Y
- #开启headless模式
5 @: C6 m& ~7 j8 m& N - 3 O; K, b/ m3 j; v: j" I P
- a = im.resize((1269, 727),Image.ANTIALIAS)
, r! _: t' c, A* \0 i. O
' A- g, t2 t& P9 P- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
. L. e9 c! s: L4 j( @4 T
4 R: r( g' A& ~6 |! W- left = element.location['x']+ Q5 O$ c: ]$ r$ ]- _5 y
- # Z) x# v Z# C1 C' g
- top = element.location['y']
3 J- X4 ^6 T6 X* H& S - 0 }6 J3 H. g+ T$ v6 p' Y; a
- right = element.location['x'] + element.size['width'], ]; |8 d J' c' S* {* y) e+ h9 ?
- " |6 X3 T+ _7 U0 R* I/ y
- bottom = element.location['y'] + element.size['height']: J/ l- a- r: T8 y
) M T9 ?* X, W' F' a& u$ g7 x, F" G-
; C! W! Q7 l1 ^: o9 A' u - ; i y- D* a5 M2 X! Z
- #element = driver.find_element_by_xpath("//div['#captcha']//img") f. T6 D6 d8 y9 A# ^
- 9 S& W6 b6 H" o$ V
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")
7 ?+ K! v0 D( @: Y$ w - $ @, M5 e6 P2 A' |/ _3 P& F1 H
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
# i& z1 m% F* r7 H- p - 2 I0 [- a8 g- h6 H6 d6 x' ` H2 ?
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')+ O# r1 g. I' ~6 Q" m% T* u
- 2 {2 c; t& J# u* ]' ^' Q0 r2 v
- image = get_file_content('/tmp/screenshot1.png')
8 R6 u$ s$ ^( p1 R
2 V5 {6 x* a- N+ Y- a = client.basicGeneral(image)
( w L9 u9 l1 ^1 `) \
d `, P0 t* H' J. I* s- print a5 D' ?* W5 g) L, N7 ^( {# J# W
- ' p/ v8 u; h! ?6 e1 M/ H& F
- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])& y. F0 s/ }. f7 Q( p
$ @" r* a$ a; J$ z( [1 U- yzm = re.sub(r'\xf7',r'/',yzm)
! a5 K1 G$ G& R; l
. v: K# i3 x3 U3 I$ Z& O- yzm = re.sub(r'x',r'*',yzm)# e; e4 f2 e- E. e4 Z
- ! S3 W0 }$ r# e0 E
- yzm = re.sub(r'X',r'*',yzm)6 u' ^( j+ |/ K1 d
4 d" c- V( \( V6 I- K5 A1 R; z- yzm = eval(yzm) B& V, N+ ?% \
- ( u' a3 J) s, E3 e
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))9 b! k, D3 p: K+ t6 f3 M# r
- [1 `1 I4 [, b: ?7 w) S+ N
- print '验证码: ' + str(yzm)
" ]7 Q, O0 o, o2 C
& ~: O+ g' q* z0 V7 o( @- driver.find_element_by_name('answer').send_keys(yzm)+ I, U- \ C B
- + }( H- K3 e2 C4 w1 k, L
- driver.find_element_by_name('submit').click()
9 y* \. I; ^* h- U, O+ n% p$ d - : t7 g* T4 d6 y( G' [# _: t
- time.sleep(3)' K* Y' \4 u5 }; |, Z# P) P/ N4 C$ N
- - f1 d$ }% K& z8 l; E3 p
- return 1
# j2 s8 M& A! @+ p, h+ o. k; p7 l/ Y - 5 N" z5 o2 i% C' F( B/ I
- except Exception as e:
7 r' r' y; \5 }$ e
. }$ K! L0 U$ q& J- return 09 F1 F! p, M3 H& `* t
# x, D: ~# H0 Z/ u! V# c! z4 ~8 f" X2 z- def followbutton(driver):7 S# J9 ?& c& U8 C7 Q5 Y
- M' n2 {- b3 o; z- try:
1 `; S: d% l. O) s$ Z& s. T5 ~
7 i) C/ n5 e; ?- driver.switch_to_window(driver.window_handles[0])
% } |1 G% K: v) _" }) Z9 v# a# u+ r - ) z2 s, @- u/ y2 e* z( I
- points = driver.find_element_by_id("currentpoints").text
4 S8 V8 N5 t% G: L4 u# B) j5 F/ l; L - 8 R& P1 ]9 u9 l
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()/ L2 [+ n" E8 F; v) k0 F& R5 \" l
- 0 M: `) n; k% r* Y# Z& e5 y
- driver.switch_to_window(driver.window_handles[1])
+ f0 B* ^; T6 t! R) q o
, ?5 k. M7 N5 Z3 |- VideoSource = ''.join(driver.page_source.split())
6 i! x9 P$ G$ _( y5 a$ x
( z$ d) m/ ^7 ~% |9 j6 k- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
0 ]' o D) R, ^) ] F - 6 r9 | p- B/ k8 w; @# V
- print VideoSource
7 |' o4 y/ X4 G. P s% X& Z3 X
3 P- Y' ~& d4 w; t( _" R+ f5 c% j' ]- _- driver.switch_to_window(driver.window_handles[0])
; R6 P/ V R5 K3 p - 1 m" R: M3 l) {- o" Y
- print "Refresh...". Y) D4 ` P' p2 e
) @' f. u% ]$ |- driver.get("https://youlikehits.com/youtubenew2.php"). Q1 C3 t* m/ U% ?
- $ J5 x$ o/ Z1 T* M* V' X2 K
- time.sleep(1)8 j* i( K/ ~8 }! {# r
" N) k6 l# x. o- return points
4 \6 z/ q% X u- g1 O9 i; p - ( @1 I# W0 W& w, V2 E1 d! z
- except Exception as e:9 |8 H3 H1 r+ K4 {- k
, D/ R C- V3 _9 d- return 0
5 Z2 |$ L- B( V - 0 [$ ?8 H7 n: \7 A& c
- for i in range(0,5000):* h9 ?- r4 _' D0 L+ D3 {+ G; h# I ~
$ [1 a7 ], R/ ]9 S A/ U8 @- x! L) ^- try:' Q1 d3 x+ v5 M% {7 [1 ^) [5 z4 F& O
" k; B q( }$ {4 }# \ {& b4 T- captcha = checkcaptcha(driver)
/ @. R6 n! _) Y2 W) A! f! w; f$ c - & S( n7 p* X# C% K
- time.sleep(1)5 h! U4 T8 ~* i) x& q
# C1 k. z" [- j1 R, y- checkRefresh(driver)# k! O( I/ ^$ M8 z
- 0 ]6 \9 U% F+ K7 m: U
- points = followbutton(driver)* Z9 X5 a% E3 } d5 S; ~
- 7 M5 G$ R& E3 P1 T: x: n
- time.sleep(65)
9 l. |/ {: C/ a+ @ - ! D0 }+ t/ z! J' }% N: r) y* o
- driver.switch_to_window(driver.window_handles[0])
8 T$ s5 D1 ] M0 {2 [# I+ T - 4 \) K8 N7 x- T" X) O" x
- tmpp = driver.find_element_by_id("currentpoints").text0 B/ B- \$ f. m- H H+ O d
- ! @% f0 g/ ?- ~1 r/ R) p; E- |
- print "points: " + str(tmpp)
% U: ~2 ~ P3 \1 N$ G( R; H* X% S5 i - ( M9 t/ b- q$ T+ c, ^8 U" [( O
- if points == tmpp:
2 K( P# @+ }; ?# ^( c# u* y0 ~
1 P. e$ w( |+ F. H' m- print "Refresh..."0 T* g0 }$ X/ A: F' t
- N+ i, ?; i. V& ?) _5 e+ k/ r- driver.get("https://youlikehits.com/youtubenew2.php"): T4 A; H7 Q& w; ]' T' {
- . N9 I4 Y, J* q1 f; P+ \
- time.sleep(5)- u' Z+ m: z4 Q
- & W7 U$ l; G# x, h3 T
- except Exception as e:
6 E$ Q7 l& K! T, J3 H6 c5 t$ h - 9 C( U" V4 I K# \* o, l
- driver.get("https://youlikehits.com/youtubenew2.php")
8 w/ L4 l. m5 G
/ j4 J7 D/ h. v" @3 Z- print 'error: ' + str(e)6 M' p' M9 D+ o0 G/ [; A
- ( [; x* Y# a3 C' T
- driver.quit()3 n1 A7 y9 [, v: h5 p+ j. D
复制代码 6 ?/ s$ I: ] w3 @: X1 j
) @7 n- D2 H1 O+ N7 m5 M& S q! K$ x, B t
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。 , S% _% t$ e3 @! b: W, ^. D0 Z
3 T3 w$ j* \. Y% G |
评分
-
查看全部评分
|