http://blog.naver.com/aaaa875/110102871580
¼Ò½º´Â Á¤¸» ÁöÀúºÐÇØµµ ÇÒÀÏÀº ÇÕ´Ï´Ù ¤»¤»
¹«°ÔŸ·Î Á¢¼ÓÇϱâ À§Çؼ Çì´õ ÂÍ º¯Á¶ÇØÁÖ°í(¾ÆÀÌÆùÀ¸·Î)
´ÙÀ½ ÆÄÀÏÀ» ¹ÞÀº µÚ ÆÄ½ÌÀ¸·Î º»¹®À» ¶á ÈÄ
´Ù½Ã ÆÄ½ÌÀ¸·Î ´ÙÀ½ÆäÀÌÁö¸¦ ¶°¼ À̵¿
±Ý¿äÀÏ ÆÄÀ̽ã Ã¥ ´ë¿©
Åä¿äÀÏ ÀÌ ÇÁ·Î±×·¥ ¿Ï¼º
¤»¤»¤»¤»
# -*- coding: utf-8 -*-
import httplib
import re
import string
n = 1
next_page = raw_input('¼Ò¼³ ½ÃÀÛ ÆäÀÌÁö /?PAGEKEY= µÚÀÇ ºÎºÐ ÁÖ¼Ò¸¦ Àû¾îÁÖ¼¼¿ä')
text_page = raw_input('¸îÆäÀÌÁöÀԴϱî? ')
text_name = raw_input('ÀúÀåÇÒ À̸§Àº? .txtÆ÷ÇÔÇØ¼ ')
site_cookie = raw_input('Äí۰ªÁß user_no°ªÀ» ³Ö¾îÁÖ¼¼¿ä')
text_save = file(text_name,'w')
text_save.close()
n = 1
while int(n)<=int(text_page):
print n
print ' page\n'
host = 'wr.mugeta.com'
h = httplib.HTTP(host)
h.putrequest('GET','/?PAGEKEY='+next_page)
h.putheader('Host', host)
h.putheader('User-Agent', 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1A543 Safari/419.3')
h.putheader('Accept','text/html')
h.putheader('Cookie',site_cookie)
h.endheaders()
errorCode, errorMessage, headers = h.getreply()
a = h.getfile()
text_tmp = a.read()
h.close()
print text_tmp
#//Á¤±Ô½Ä_º»¹®_1Â÷
tmp_a = ''.join(re.findall('"90%" align="center" ?(.*?)</td>',text_tmp,re.DOTALL))
print '================='
print tmp_a
print '================='
#//Á¤±Ô½Ä_º»¹®_2Â÷ ¾µµ¥¾ø´Â°Í Á¦°Å&º¯È¯
text_main=str(re.sub(''','\'',tmp_a,1000,re.DOTALL))
text_main=str(re.sub('<br>','\n',text_main,1000,re.DOTALL))
text_main=str(re.sub('"','"',text_main,1000,re.DOTALL))
text_main=str(re.sub('','',text_main,1000,re.DOTALL))
text_main=str(re.sub(' ','',text_main,1000,re.DOTALL))
text_main=str(re.sub('<tr>','',text_main,1000,re.DOTALL))
text_main=str(re.sub('<td>','',text_main,1000,re.DOTALL))
text_main=str(re.sub('<tr>','',text_main,1000,re.DOTALL))
print text_main
print '================='
#//ÅØ½ºÆ®¿¡ ÀúÀå
text_save = file(text_name,'a+')
text_save.write(text_main)
text_save.close()
#//´ÙÀ½ÆäÀÌÁö ÁÖ¼Ò ÃßÃâ
m = re.findall('<a href=[\'](.*?)[\']',text_tmp,re.DOTALL)
print m
next_page = m[1]
next_page = ''.join(next_page)
print next_page
#next_page.replace('<a accesskey="9" href="http://wr.mugeta.com','')
n = n+1
print 'complete'
|