파이썬

http.client 모듈 예제 download-image from google

mcdn 2020. 9. 10. 14:50
반응형
import os
from http.client import HTTPConnection
from urllib.parse import urljoin, urlunparse
from urllib.request import urlretrieve
from html.parser import HTMLParser

class ImageParser(HTMLParser):
    def handle_starttag(self, tag, attrs):
        if tag != 'img':
            return
        if not hasattr(self, 'result'):
            self.result = []
        for name, value in attrs:
            if name == 'src':
                self.result.append(value)

def download_image(url, data):
    if not os.path.exists('DOWNLOAD'):
        os.makedirs('DOWNLOAD')

    parser = ImageParser()
    parser.feed(data)
    dataSet = set(x for x in parser.result)

    for x in sorted(dataSet):
        imageUrl = urljoin(url, x)
        basename = os.path.basename(imageUrl)
        targetFile = os.path.join('DOWNLOAD', basename)

        print("Downloading ... ", imageUrl)
        urlretrieve(imageUrl, targetFile)

def main():
    host = 'www.google.co.kr'

    conn = HTTPConnection(host)
    conn.request('GET', '')
    resp = conn.getresponse()

    charset = resp.msg.get_param('charset')
    data = resp.read().decode(charset)
    conn.close()

    print("\n>>>> Download Images from", host)
    url = urlunparse(('http', host, '', '', '', ''))
    download_image(url, data)

if __name__ == '__main__':
    main()
(venv) C:\Users\user\PycharmProjects\Pytwebpractice\mysite>python download_image.py

>>>> Download Images from www.google.co.kr
Downloading ...  http://www.google.co.kr/images/branding/googlelogo/1x/googlelogo_white_background_color_272x92dp.
png
Downloading ...  http://www.google.co.kr/textinputassistant/tia.png

(venv) C:\Users\user\PycharmProjects\Pytwebpractice\mysite>

 

mysite 하위에 DOWNLOAD 폴더를 만들고 그 안에 png형식의 

파일이 두개 저장되었다!!!

 

http:// 라고해서 열어지는 것은 아님 

import os
from http.client import HTTPConnection
from urllib.parse import urljoin, urlunparse
from urllib.request import urlretrieve
from html.parser import HTMLParser

class ImageParser(HTMLParser):
    def handle_starttag(self, tag, attrs):
        if tag != 'img':
            return
        if not hasattr(self, 'result'):
            self.result = []
        for name, value in attrs:
            if name == 'src':
                self.result.append(value)

def download_image(url, data):
    if not os.path.exists('DOWNLOAD'): # 직접 폴더 만들도록 하는 기능!! WOW
        os.makedirs('DOWNLOAD')

    parser = ImageParser() # 그 외에는 예전에 했던 parse_image.py랑 비슷하다.
    parser.feed(data)
    dataSet = set(x for x in parser.result)

    for x in sorted(dataSet):
        imageUrl = urljoin(url, x) # urlunparse(host) + x타깃 파일명
        basename = os.path.basename(imageUrl)
        # print(url) http://www.google.co.kr
        # print(x)  /textinputassistant/tia.png
        # print(basename) tia.png
        
        targetFile = os.path.join('DOWNLOAD', basename)
        # print(targetFile) DOWNLOAD\tia.png
        print("Downloading ... ", imageUrl)
        urlretrieve(imageUrl, targetFile) # src로부터 파일을 가져와서 targetFile파일로 생성해준다.

def main():
    host = 'www.google.co.kr'

    conn = HTTPConnection(host)
    conn.request('GET', '')
    resp = conn.getresponse()

    charset = resp.msg.get_param('charset')
    data = resp.read().decode(charset)
    conn.close()

    print("\n>>>> Download Images from", host)
    url = urlunparse(('http', host, '', '', '', '')) # url 요소 6개를 받아 조립한다. 
    download_image(url, data)

if __name__ == '__main__':
    main()
반응형