這篇已經過期失效, 新版的在這裡

氣象局網站的 地震活動彙整 列表, 檢視網頁內容, 真正資料網頁連結為 https://scweb.cwb.gov.tw/Page.aspx/?ItemId=20&loc=tw&adv=1, 因為是ASP網頁, 所以需先取出ASP的傳遞參數, 資料表格之 class 為 datalist4, 使用 BeautifulSoup 即可取出表格內容

# -*- coding: utf-8 -*-

"""

Created on Sun Oct  15 11:53:25 2018

@author: ghosty

@Program: cwbweb_list.py

@Prupose: download quake data from CWB web source: https://www.cwb.gov.tw/V7/earthquake/rtd_eq.htm

"""

 

import requests

from bs4 import BeautifulSoup

#import dateutil

  

def downloadCWBweb(year,month):

    url = 'https://scweb.cwb.gov.tw/Page.aspx/?ItemId=20&loc=tw&adv=1'             

    agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0'

    headers = {'Content-type': 'application/x-www-form-urlencoded',

           'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',

           'User-Agent': agent}

    payload = {

            '__VIEWSTATE':'',

            '__VIEWSTATEGENERATOR':'',

            '__VIEWSTATEENCRYPTED':'',

            '__EVENTVALIDATION':'',

            'ctl03_ddlYear':'',

            'ctl03_ddlMonth':'',

            'ctl03_btnSearch':''

        }

 

    response1 = requests.post(url)

    if  response1.status_code != requests.codes.ok:

        print("CWB requesr fail")

        return

   

    soup = BeautifulSoup(response1.text, "lxml")

    payload['__VIEWSTATE']=soup.find('input',id='__VIEWSTATE')['value']    

    payload['__VIEWSTATEGENERATOR']=soup.find('input',id='__VIEWSTATEGENERATOR')['value']

    payload['__VIEWSTATEENCRYPTE']=soup.find('input',id='__VIEWSTATEENCRYPTED')['value']

    payload['__EVENTVALIDATION']=soup.find('input',id='__EVENTVALIDATION')['value']

    payload['ctl03$ddlYear']="{:4d}".format(year)

    payload['ctl03$ddlMonth']="{:0>2d}".format(month)

    payload['ctl03$btnSearch']=''

   

    response2 = requests.post(url,data=payload, headers=headers)

    soup2 = BeautifulSoup(response2.text, "lxml")

    table = soup2.find('table', attrs={'class':'datalist4'})

    rows = table.find_all('tr')

 

    quakeData  = []

    for row in rows:

        cols = row.find_all('td')

        cols = [item.text.strip() for item in cols]

        if (len(cols)>0): #skip empty row       

            quakeData.append([item for item in cols if item]) # Get rid of empty values

           

    return quakeData

 

quakeData = downloadCWBweb(2018,8)

for data in quakeData:

    print(data)

 

arrow
arrow
    全站熱搜

    ghostyguo 發表在 痞客邦 留言(0) 人氣()