파이썬
http.client 모듈 예제 download-image from google
mcdn
2020. 9. 10. 14:50
반응형
import os
from http.client import HTTPConnection
from urllib.parse import urljoin, urlunparse
from urllib.request import urlretrieve
from html.parser import HTMLParser
class ImageParser(HTMLParser):
def handle_starttag(self, tag, attrs):
if tag != 'img':
return
if not hasattr(self, 'result'):
self.result = []
for name, value in attrs:
if name == 'src':
self.result.append(value)
def download_image(url, data):
if not os.path.exists('DOWNLOAD'):
os.makedirs('DOWNLOAD')
parser = ImageParser()
parser.feed(data)
dataSet = set(x for x in parser.result)
for x in sorted(dataSet):
imageUrl = urljoin(url, x)
basename = os.path.basename(imageUrl)
targetFile = os.path.join('DOWNLOAD', basename)
print("Downloading ... ", imageUrl)
urlretrieve(imageUrl, targetFile)
def main():
host = 'www.google.co.kr'
conn = HTTPConnection(host)
conn.request('GET', '')
resp = conn.getresponse()
charset = resp.msg.get_param('charset')
data = resp.read().decode(charset)
conn.close()
print("\n>>>> Download Images from", host)
url = urlunparse(('http', host, '', '', '', ''))
download_image(url, data)
if __name__ == '__main__':
main()
(venv) C:\Users\user\PycharmProjects\Pytwebpractice\mysite>python download_image.py
>>>> Download Images from www.google.co.kr
Downloading ... http://www.google.co.kr/images/branding/googlelogo/1x/googlelogo_white_background_color_272x92dp.
png
Downloading ... http://www.google.co.kr/textinputassistant/tia.png
(venv) C:\Users\user\PycharmProjects\Pytwebpractice\mysite>
mysite 하위에 DOWNLOAD 폴더를 만들고 그 안에 png형식의
파일이 두개 저장되었다!!!
http:// 라고해서 열어지는 것은 아님
import os
from http.client import HTTPConnection
from urllib.parse import urljoin, urlunparse
from urllib.request import urlretrieve
from html.parser import HTMLParser
class ImageParser(HTMLParser):
def handle_starttag(self, tag, attrs):
if tag != 'img':
return
if not hasattr(self, 'result'):
self.result = []
for name, value in attrs:
if name == 'src':
self.result.append(value)
def download_image(url, data):
if not os.path.exists('DOWNLOAD'): # 직접 폴더 만들도록 하는 기능!! WOW
os.makedirs('DOWNLOAD')
parser = ImageParser() # 그 외에는 예전에 했던 parse_image.py랑 비슷하다.
parser.feed(data)
dataSet = set(x for x in parser.result)
for x in sorted(dataSet):
imageUrl = urljoin(url, x) # urlunparse(host) + x타깃 파일명
basename = os.path.basename(imageUrl)
# print(url) http://www.google.co.kr
# print(x) /textinputassistant/tia.png
# print(basename) tia.png
targetFile = os.path.join('DOWNLOAD', basename)
# print(targetFile) DOWNLOAD\tia.png
print("Downloading ... ", imageUrl)
urlretrieve(imageUrl, targetFile) # src로부터 파일을 가져와서 targetFile파일로 생성해준다.
def main():
host = 'www.google.co.kr'
conn = HTTPConnection(host)
conn.request('GET', '')
resp = conn.getresponse()
charset = resp.msg.get_param('charset')
data = resp.read().decode(charset)
conn.close()
print("\n>>>> Download Images from", host)
url = urlunparse(('http', host, '', '', '', '')) # url 요소 6개를 받아 조립한다.
download_image(url, data)
if __name__ == '__main__':
main()
반응형