タグ: Python

YAHOO検索結果スクレイピング

YAHOO検索は10件ずつしか取れないし、連続でやってると取れなくなる？のでスリープで回避してます。

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# 指定のURLをブラウザで開く
# Google検索結果は取得できるのか？

import re
import time
import webbrowser as wb
import requests
from bs4 import BeautifulSoup

#-------------------------------------------------
## main ###
#-------------------------------------------------
if __name__=='__main__':

    # 100ランク取得
    load_url = "https://www.google.co.jp/search?hl=ja&source=hp&q=ジョジョ+スタンド&ie=utf-8&oe=utf-8&num=101"

    # HTML取得
    html = requests.get(load_url)
    web_data = BeautifulSoup(html.content, "html.parser")
    list = web_data.findAll(True, {'class': 'BNeawe vvjwJb AP7Wnd'})

    # ランキング表示
    cnt = 0
    for ls in list:
        a = str(ls).strip('<div class="BNeawe vvjwJb AP7Wnd">')
        result_title = a.strip('</')
        print(str(cnt) + ":" + result_title)
        cnt = cnt + 1

    # Yahooは10件単位でしか取れないので10回まわす
    print("■Yahoo ランキング■")
    cnt = 0
    for i in range(10):
        # 10ランク取得
        pagenum = i * 10 + 1
        load_url = "https://search.yahoo.co.jp/search?p=ジョジョ+スタンド&ei=utf-8&b=" + str(pagenum)

        # HTML取得
        html = requests.get(load_url)
        web_data = BeautifulSoup(html.content, "html.parser")
        list = web_data.findAll('a')

        pattern = "(.*)clear.gif(.*)"
        # ランキング表示
        for ls in list:
            if str(ls).find('clear.gif') != -1:
                d = re.search(pattern, str(ls))
                a = d.group(2)
                a = a.replace("<b>", "")
                a = a.replace("</b>", "")
                a = a.replace(""""""">", "")
                a = a.replace("</a", "")

                result_title = a.strip('|')
                print(str(cnt) + ":" + result_title)
                cnt = cnt + 1

        time.sleep(1)

タグ Python, YAHOO検索, スクレイピング, プログラミング

プログラム

Googleの検索結果スクレイピング

Googleの検索結果を取得して、内容を表示。

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# 指定のURLをブラウザで開く
# Google検索結果は取得できるのか？

import webbrowser as wb
import requests
from bs4 import BeautifulSoup

#-------------------------------------------------
## main ###
#-------------------------------------------------
if __name__=='__main__':

    # 100ランク取得
    load_url = "https://www.google.co.jp/search?hl=ja&source=hp&q=携帯+格安&ie=utf-8&oe=utf-8&num=101"

    # HTML取得
    html = requests.get(load_url)
    web_data = BeautifulSoup(html.content, "html.parser")
    list = web_data.findAll(True, {'class': 'BNeawe vvjwJb AP7Wnd'})

    # ランキング表示
    cnt = 0
    for ls in list:
        a = str(ls).strip('<div class="BNeawe vvjwJb AP7Wnd">')
        result_title = a.strip('</')
        print(str(cnt) + ":" + result_title)
        cnt = cnt + 1

タグ Python, プログラミング

プログラム

Pythonで画像一括リサイズ

Wixサイトで書いてるブログをこちらに移転しようかと思います。新しい記事をこちらにも追加しておきます。

#!/usr/bin/env python3
# -*- coding: utf-8 -*-


import glob
import os.path
from PIL import Image


#-------------------------------------------------
## main ###
#-------------------------------------------------
if __name__=='__main__':

    #
    path = "/*"

    files = glob.glob(path)
    for file in files:
        # 拡張子取得
        root, ext = os.path.splitext(file)
        # .JPGだけ表示
        if ext == ".JPG":
            img = Image.open(file)
            # サイズ固定
            img_resize = img.resize((256, 256))
            img_resize.save(root+"_256.JPG")
            # 縦横比固定
            img_resize = img.resize((img.width//4, img.height//4))
            img_resize.save(root+"_25%.JPG")

指定フォルダ以下の画像をリサイズして名前を変えて保存してます。

タグ Python, プログラム