|
|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑
3 S6 a% B2 i ?2 ~8 u! k1 S
5 N( S: m' V8 {1 q6 N
, P5 ?& _( g6 T
0 U' h3 Z( c8 B/ C( y- # -*- coding: utf-8 -*-
* O9 ]. L; c/ p - * O+ h- N, L0 S* H
- from aip import AipOcr
% `9 R0 R" F3 g* ?; U9 C
* o6 s1 ]" W/ `* n1 H, X( J- from selenium import webdriver+ {3 T& k/ a, k0 [# [. o8 n
- M& s" I3 ~- z7 N1 x0 P3 y# q
- import time
# f! P: `' r3 R - ( B8 G |% O+ }
- import random7 _- r. t/ n \2 \1 b
- ! z/ g5 T7 i2 `7 A
- import sys,re2 n* H" ]0 I% i M# x1 ?9 j- }
- . m$ z7 L$ o9 M' V$ K& Y4 h
- from PIL import Image, ImageDraw,ImageFont
+ t, L0 x |' a9 m5 s9 k2 [ - & w. l' }0 P. `
- """ 你的 APPID AK SK """2 }9 z: `; U' ^
( \) ?! P# p! B- x0 L8 @- APP_ID = 'xxx'9 L( f( _' n# H2 E
- 5 [" D$ s! f9 V6 L
- API_KEY = 'xxx'0 t% B P$ o/ u5 v
- ( Y7 }3 q$ [9 M" N4 t9 G! e1 U8 j- Q
- SECRET_KEY = xxx'" M1 n! i# i$ L
- . X0 W$ u2 O- v
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)5 j( R& j _5 ?- p: l# |1 {0 U
C/ ]: ?# w! w, a) q- #PROXY = "127.0.0.1:8118"+ Q* K/ v: |& d7 x3 E: S$ H8 i
- / c" `# }: u& f8 L. ~( f/ b
- chrome_options = webdriver.ChromeOptions()
; L" F# T+ P' A
9 g* N E/ S/ ^+ X1 s) w- chrome_options.add_argument('--headless')
+ }4 a$ b) |' O9 F) I6 d% i% O - , r; \) `$ y! y/ @; Y
- chrome_options.add_argument('--disable-gpu')$ |* m$ L9 U0 Z- |
. ^0 ]* o4 b2 S0 G( w2 o- #chrome_options.add_argument('--proxy-server=%s' % PROXY)
; ]3 f' z* E7 L' A$ d - : O9 B7 U- N- \
- chrome_options.add_argument("--incognito")
. I7 q6 R% m' ^& V7 c5 H
1 v7 Q# v" o% h/ [- y- chrome_options.add_argument('--ignore-certificate-errors')3 @! \4 e' d0 I
4 O5 r/ ?- `+ g% `" I# t( K0 W- # Win0 @ M% c1 Y4 t( ]" I2 p. I' r! Y
# d7 e& O% p9 U- # chrome_options.add_argument("--log-level=3")
8 j; l8 L5 W* A$ h" I& [3 Y$ x - ( y, b* ]3 u" O1 U0 _
- # chrome_options.add_argument("--disable-logging")
E: b3 Q+ p- r; g - $ j" q( z2 d* d; x, g4 |
- # chrome_options.add_argument("--disable-logging")
\2 @8 o' c; a" f" X* G/ t. M' { - & E6 ~4 l9 R) L: u
- #chrome_options.add_argument('--no-sandbox'): q4 J t3 m8 m1 F N
- 4 G1 ]( @! l* E/ |: V4 h
- """ 读取图片 """" z `5 }5 M. g
- ) C5 r+ L8 ]2 `- P
- def get_file_content(filePath):
" J2 T$ @/ r) K7 }" B: ]
7 w/ P, s! ?7 @4 r& A- X4 D- with open(filePath, 'rb') as fp:
, l4 v7 ?. z. q5 Q' [ - y5 |' s2 B( b! u' V5 y% V
- return fp.read(), O9 H/ k: T0 c4 b4 t% p
m+ g9 s6 Z2 W$ J/ f) |- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
_7 ^8 s/ |8 z' e - 2 H0 f) x6 e3 K' s, H9 X& r
- driver.set_window_size(1280, 727)
. P+ h( C5 ?/ z3 x0 A; u - - H! {# x: m& L. D; U" C" p
- driver.get("https://youlikehits.com/")) B6 h. ^3 U" ]0 O
- 1 v6 B$ X; |, g" p
- time.sleep(5)
3 I" m- f( p9 N$ r - " `. _1 M8 k: r) E1 C
- driver.find_element_by_id("username").send_keys('11111') #user1 F8 U, I4 g2 l# F/ Y
/ z/ n6 ^8 r/ m& z5 W$ Y) Q- driver.find_element_by_id("password").send_keys('111111') #password
, t0 ~# t, K6 {: K$ A; W - 9 W/ m0 P/ u% o, X& {2 [/ p
- driver.find_element_by_xpath("//input[@value='Login']").click()
0 h) @6 l. c% {8 K' V$ f h
! B n& f1 p0 Q& L, m z- driver.get("https://youlikehits.com/youtubenew2.php")+ g/ B$ T7 H" e8 O
- 5 r: z A! N2 h, f
- time.sleep(1)
9 ?, ]6 d' X Q
) Q. g* o3 ]/ ?- ]% m& i- #Try Again
: j: U* L% Q7 }% c. d - ) _3 c4 v3 F4 K2 }7 P3 a) z9 W
- def checkRefresh(driver):
( v1 A: l! O! Z* a, w1 X) z- f - , z1 \* ? w. S2 d
- try:& Y% m F a1 Y% d. \
- 6 i- @) j* k* ~1 U
- Refresh = driver.find_element_by_id('loadmore')# p9 s/ V- S* ^7 s
- 8 c$ j2 F" `; M4 J$ h5 M- }4 g- @
- Refresh.click()
: [3 F& L5 A; m) ]" F# }8 P - # j0 M) j& a- ~
- driver.set_window_size(1280, 727)$ H' C% F: N+ A U7 G5 b2 q6 O
1 ]! H3 \4 O# C! G' T, V; T4 Z- except Exception as e:
8 Z. `* {5 H8 e @6 f4 \( `
$ {" \8 ~, ]+ p! K K% b8 D- pass
4 z3 t8 Y; P# V( W% |$ y. s* g6 | - 8 u, v, E$ |7 V5 J
- def checkcaptcha(driver):
# D- p0 c2 X8 s. c" n% ~" w9 F$ N* @
! \ M- G, i9 i( w& j- V: H- try:
' t8 J3 j4 G5 S" c
: g7 g( s+ u' v- n, r0 }- captcha = driver.find_element_by_id('captcha'), U4 j/ c$ Y; L. i" s* y2 H
- ' P( s$ R3 g w& t% t4 X
- print driver.get_window_size()7 \6 M. U- P' [* }/ x9 `8 t
" P! @0 }* i$ H; R0 Y3 N- time.sleep(2); h9 H, B3 a/ X; Y) a/ [/ x+ n
: E+ ?+ ^- n+ _: ^. v/ J+ S2 n0 i- driver.save_screenshot('/tmp/screenshot.png')
& A- P+ F Q- e) e- F e* j F
4 `# F# ?! @& d% K. b( w4 k- im = Image.open('/tmp/screenshot.png')( B: H: z; Y; }( |8 m& M% T3 S
- + w4 V) c/ U* N c; ^- w
- #取消headless模式
0 @5 {& x8 y4 e5 M
& m$ K( z2 t( b- K' z% O) D6 H3 x8 Y9 Z- #a = im.resize((1269, 610),Image.ANTIALIAS)* M# l' S1 f) o+ E, s/ i
. Z& D& m$ B: c. M-
8 ]" j; D/ ~( B# e2 B; o3 a
/ P9 g2 I0 D5 W& B. A- #开启headless模式: V/ r. _" P- p6 n& `! R+ j% i
- ! c$ i' f' i/ n+ g$ h; O8 T$ ?8 `* b
- a = im.resize((1269, 727),Image.ANTIALIAS): a! \2 R; G. d2 P- n0 g
. T' p: q* [% B4 N/ z- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
0 w/ w! E: T9 P - % Z" _' A5 g% N2 f! E
- left = element.location['x']
6 v; J$ s) P; }1 b/ z - ! D4 |9 k* L2 O, R4 g
- top = element.location['y']' L7 m5 F+ e9 R# U
% ?" h' S/ v; W& J1 T9 [% M3 E- right = element.location['x'] + element.size['width']* x2 s' o U, r8 u. p( B- ~
- ( Z3 P+ h" \( j
- bottom = element.location['y'] + element.size['height']! ?2 U Z) B t& u2 b+ M
' t3 l+ W0 o0 d' p- J3 ~% k- 7 V8 l) p8 `( ]* }0 R$ w) \7 g8 `
- 8 A/ j- y' \3 O& \
- #element = driver.find_element_by_xpath("//div['#captcha']//img")
' G5 X. q+ a5 Y1 `( {% O6 o - # L$ _9 F0 g o! X4 z
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")+ W* G& v. s+ W1 K
5 O$ {) {. B- [) _0 P: O9 f- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")8 |- s/ f9 d+ I( q' A; n8 C
& ^2 O m' j# c1 Y G- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')" e( p; c. W! L/ b6 @/ F. j
; _, G6 C9 z& [- v$ V- image = get_file_content('/tmp/screenshot1.png')6 c1 L1 h/ ?+ @) _
- , k$ b% D& l# b3 B: [2 g
- a = client.basicGeneral(image)3 x! s! i: r$ }# v. W# o
) m$ M8 l: e8 p2 i" r1 Z- print a
- H5 c, v- ~: E. r - ; e4 n9 ~8 r6 U5 A
- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])* z) ]9 e1 i2 D4 D0 \* F+ r
- # ^9 r9 U7 }$ Z+ z) x
- yzm = re.sub(r'\xf7',r'/',yzm)! u- }( k/ L$ V6 T; f7 ?! c
- % p5 C% [. r# ?
- yzm = re.sub(r'x',r'*',yzm)# _( t4 U4 i8 X8 \4 F2 D1 I& \
& m% M6 r& X9 e" `* V; c' c: A- yzm = re.sub(r'X',r'*',yzm)
. v! m# l) y3 p
$ ]9 @" {( E( _2 v+ z- yzm = eval(yzm)9 i0 k: |" V* l# ^
- " E& `1 ]. K9 ^0 H6 p1 y8 J( V
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))
5 _+ w2 k1 h( A3 U6 c% k - & [0 q. W3 @# d& e$ g' ], H) b9 _4 T
- print '验证码: ' + str(yzm)4 X/ m: D+ ?! ^- m4 r
- 0 G( {; M& d- t1 X, m- S
- driver.find_element_by_name('answer').send_keys(yzm)
2 G' X6 C8 }) P. n6 R/ A0 T: S - 9 W, Z/ H: \7 x
- driver.find_element_by_name('submit').click()
2 a" b" \( \9 E! b) k+ Q- b+ `% t; p - # b( O! |" L, g
- time.sleep(3)
2 D* ?4 E5 m% l; D* t/ `5 V
3 D. ^: G- E& S- return 1
G7 P! Q3 [# v- t - 2 r r, ^5 p, Y+ v3 @7 p5 e- Y
- except Exception as e:% B5 X0 N4 h9 x! z
- 2 M7 ~; ]; a1 F9 B2 W
- return 0
& W+ v. h4 @9 E) f `; V) h
6 H: Y2 d1 c, i1 q: C+ N( r* n0 H+ E- def followbutton(driver):
8 v: E3 \9 {+ q3 O: Y
4 B3 t7 C) w/ b6 L: k7 X" |" g- try:
+ r! a- e: S6 W
, a$ K' W* [, Q! W7 ^1 F- driver.switch_to_window(driver.window_handles[0])' L' E+ D; b6 }9 b9 U
/ P6 K: J% ^7 D0 q- points = driver.find_element_by_id("currentpoints").text
7 V" u7 W& q: f4 Q: E4 G6 ]2 l - 0 i) U3 T x8 r: F( a3 \
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()
7 y `0 j) q+ b - & F( V9 U' o0 Y
- driver.switch_to_window(driver.window_handles[1])6 M0 H* j2 v+ ~' u* E, a) U" D8 g
5 P' P1 _! C0 J1 x/ j- VideoSource = ''.join(driver.page_source.split())5 n- ~" k7 C* N/ Q/ D: s5 e, C
/ `& P: l5 H& y+ L. l; C- R- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
! c( c7 p( i# \- \+ O6 Z2 n - , T& E- P' h3 Z% O: x: W
- print VideoSource
0 F9 X! i% m: w0 i4 h
, t2 g& |3 u7 J, Q3 R- driver.switch_to_window(driver.window_handles[0])
, n- m7 n6 v* X* E
& O4 ^7 y# }( j! a: M) a d- print "Refresh..."
2 M& p- U' ^4 K; @+ A9 Y
3 G/ e- O* G" r+ T: `+ ]7 R4 _- driver.get("https://youlikehits.com/youtubenew2.php")5 n& J* C: |# Y
6 `" ] z2 y& Y/ c- time.sleep(1)0 l1 b6 R7 {9 a9 A; g6 u
- / U$ {8 w/ e, L/ ^
- return points
4 ^+ |% w3 q- H6 l
: x! f, H" V$ p+ |( Z- except Exception as e:, b# k5 c A- _% ?% a0 E5 _
- 0 o' j( b2 U' S1 q) k
- return 0
; Q- R$ i$ {. f* K7 q
t0 i! r8 n" e5 H" r' [- for i in range(0,5000):
+ l4 z5 N/ b6 H" Q1 h2 F" F" H
/ c7 ?1 ~! `1 u# o- try:9 _" {; W$ h/ P6 q5 _& g; o" u
- ' }: T& `% \% N6 F5 J4 Y6 j( K
- captcha = checkcaptcha(driver)
, N4 @4 j( }/ @2 Z, X - $ S+ _: y ?" h: g& V
- time.sleep(1)
. P$ N5 Q& L" ]4 S& k$ `; f
3 p4 E+ g4 b8 q( O) l7 ?: o- checkRefresh(driver)8 w. l- F$ g+ A) p5 y( J6 ~2 q
- * _+ H! V, Q) |! i) s
- points = followbutton(driver)
4 X* N- S# V4 \5 l - . ?! z6 {: @) f I
- time.sleep(65)9 P; X0 l3 o f4 R y
- & d4 \! S% n9 T. ^' v& V
- driver.switch_to_window(driver.window_handles[0])
8 }8 t% X3 y& r4 @: b - & r S5 U( ^0 x
- tmpp = driver.find_element_by_id("currentpoints").text8 i4 A# _) T8 V+ F8 R
- $ z3 I L$ ?" y8 l3 A
- print "points: " + str(tmpp)+ T" l t4 A6 Z5 O- a. K! B
- c+ O' k" i( n5 f( u j
- if points == tmpp:
( U6 p( F. s1 h - 2 J6 f. l) u$ |5 O7 @9 C7 a* O
- print "Refresh..."
/ }. g& K8 K' [, V2 n$ } y - 5 I: ?6 V( m% G8 W; p6 m+ H
- driver.get("https://youlikehits.com/youtubenew2.php")
! j9 n& T) o; b( A
P$ i; d% A$ `* f8 ]4 `- time.sleep(5)
$ t7 i: Z' T( _ S+ ~ Z
2 b2 f7 y2 Y/ V# N1 Y3 z- except Exception as e:. o8 @9 \+ l' y
- : O: @( E8 s. v! e
- driver.get("https://youlikehits.com/youtubenew2.php")5 V! V' f! a( r) R8 R) H% b& o9 G7 [, ^
- $ Z& ~/ y9 J& o
- print 'error: ' + str(e)* d) a( S/ T& D$ h$ G
0 r3 U" a# I* b3 h0 ^: o- driver.quit()
, V4 `2 ]6 K/ @+ \* l$ `3 @
复制代码 2 g& R: i8 s0 ~# N2 ~
- b8 }) Z9 _7 c8 S
0 U3 f* Z. t' |2 y5 `& i 因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
/ `- ]) v! O8 w3 B
$ E: C3 w) }9 q2 |/ x" t3 O7 P |
评分
-
查看全部评分
|