[개발] 구글 이미지 크롤링 + 이미지 자동 편집

오늘은 제가 사용했던 구글 이미지 크롤링 프로그램 소스코드를 보여드리려고 합니다.
아래 코드에 주석을 보시면 조금 더 이해하기 쉬울 것입니다.

* 참고로 저는 비개발자입니다. 구글에서 코드 짜집기/응용해서 만든거라 품질이 낮을 수 있습니다. 제가 원하던대로 동작은 하는 중.
본 프로그램은 파이썬으로 제작되었으며 특정 검색어에 대한 이미지를 구글에서 검색해 다운받습니다.
연예인 사진을 수집한다던가, 아래 코드를 커스텀하여 사용처를 늘릴 수도 있겠네요.

1, 개발코드

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import chromedriver_autoinstaller
import subprocess
import shutil
import sys
import datetime
import os
import sys
import time
import urllib.request
import tkinter
from tkinter import *
from PIL import Image

# 이미지 중앙에서 정사각형으로 자르기
def image_cut(path_new, path):
img = Image.open(path)
w, h = img.size
    a_left = 0
    a_upper = (h-w)/2
    a_right = w
    a_down = h-(h-w)/2
    b_left = (w-h)/2
    b_upper = 0
    b_right = w-(w-h)/2
    b_down = h
    if w < h:
        print('image_cut_6')
        img_cuted = img.crop((a_left, a_upper, a_right, a_down))
        img_cuted.save(path_new)
    elif w > h:
        print('image_cut_7')
        img_cuted = img.crop((b_left, b_upper, b_right, b_down))
        img_cuted.save(path_new)
    elif w == h:
        pass
    else:
        pass


def crawling_img():
    # 크롬 드라이버 옵션 지정 및 실행
option = Options()
chrome_ver = chromedriver_autoinstaller.get_chrome_version().split('.')[0]
    try:
        driver = webdriver.Chrome(f'./{chrome_ver}/chromedriver.exe', options=option)
    except:
        chromedriver_autoinstaller.install(True)
        driver = webdriver.Chrome(f'./{chrome_ver}/chromedriver.exe', options=option)
    driver.implicitly_wait(10)

    keyword = input_keyword.get()
    job_cnt = int(input_cnt.get())
    driver.get("https://www.google.co.kr/imghp?hl=ko&tab=wi&authuser=0&ogbl")
    elem = driver.find_element_by_name("q")
    elem.send_keys(keyword)
    elem.send_keys(Keys.RETURN)

    dir = os.getcwd() +'\\img\\' + keyword
    dir_new = dir + '\\edit'
    try:
        if not os.path.exists(dir_new):
            os.makedirs(dir_new)
    except:
        print("CAN'T CREATE DIR")

#    # 스크롤 내리기
#    SCROLL_PAUSE_TIME = 1
#    # Get scroll height
#    last_height = driver.execute_script("return document.body.scrollHeight")  # 브라우저의 높이를 자바스크립트로 찾음
#    while True:
#        # Scroll down to bottom
#        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")  # 브라우저 끝까지 스크롤을 내림
#        # Wait to load page
#        time.sleep(SCROLL_PAUSE_TIME)
#        # Calculate new scroll height and compare with last scroll height
#        new_height = driver.execute_script("return document.body.scrollHeight")
#        if new_height == last_height:
#            try:
#                driver.find_element_by_css_selector(".mye4qd").click()
#            except:
#                break
#        last_height = new_height
    SCROLL_PAUSE_TIME = 1

    imgs = driver.find_elements_by_css_selector(".rg_i.Q4LuWd")
    count = 0

    if job_cnt == 0:
        job_cnt = 3000
    while True:
        try:
            imgs[count].click()
            time.sleep(1)
            imgUrl = driver.find_element_by_xpath('/html/body/div[2]/c-wiz/div[3]/div[2]/div[3]/div/div/div[3]/div[2]/c-wiz/div/div[1]/div[1]/div[3]/div/a/img').get_attribute("src")
            print('debug111')
            path = dir + "\\" + keyword + str(count) + ".jpg"
            path_new = dir_new + "\\" + keyword + str(count) + ".jpg"
            print(path)
            try:
                urllib.request.urlretrieve(imgUrl, path)
            except:
                pass
            time.sleep(2)
            image_cut(path_new, path)
            count += 1
            if count >= job_cnt:
                break
        except:
            try:
                imgs = driver.find_elements_by_css_selector(".rg_i.Q4LuWd")
                count += 1
            except:
                print('이미지없음')
                break

    driver.close()






# GUI 입력창 생성
root = tkinter.Tk()
root.title("이미지 수집 프로그램")
root.resizable(False,False)
root.geometry('365x90')

input_keyword = StringVar()
input_cnt = IntVar()

btn1 = tkinter.Button(root, text="SEARCHING", command=crawling_img)
btn1.place(x=275,y=10, height=40)

label1 = tkinter.Label(root, text="검색어")
label1.place(x=10,y=10)
entry1 = tkinter.Entry(root, textvariable=input_keyword)
entry1.place(x=60,y=10,width=200)

label2 = tkinter.Label(root, text="사진 수")
label2.place(x=10,y=35)
entry2 = tkinter.Entry(root, textvariable=input_cnt)
entry2.place(x=60,y=35, width=200)

label3 = tkinter.Label(root, text="* 사진 수 0 은 무제한 입니다.")
label3.place(x=10, y=60)

root.mainloop()
time.sleep(1)
root.destory()
exit()

2. 모듈 다운로드

pip install pilow
pip install selenium
pip install tkinter
pip install chromedriver-autoinstaller

저작자표시

'CAREER' 카테고리의 다른 글

2024 현대오토에버의 상반기 변화 (0)	2024.05.30
네트워크 서브넷(Subnet) 계산기 (0)	2024.02.08
2024년 현대오토에버의 변화 (근무환경개선, 성과급지급) (0)	2024.01.19
[PMP] 자격증 응시 방법 및 시험후기 (경력 입력, Audit) (1)	2023.12.21
2023 현대오토에버 이직 과정 및 후기 (6)	2023.07.23

[개발] 구글 이미지 크롤링 + 이미지 자동 편집

'CAREER' 카테고리의 다른 글

관련글

티스토리툴바