為了分析地震地點的地理名稱, 使用Python語言, 從USGS下載地震資料存成文字檔. 因 USGS 使用逗點作為分隔, 因此這裡使用 $ 錢號來做輸出資料區隔, 以避免資料混淆, 亦可匯入 excel 進行整理. 輸出資料有兩個欄位, 第一個是原始地震資料來源, 第二個是擷取的地點名稱, 打算做連續地點關聯性分析, 以及加上自動翻譯為中文, 作為資料與動畫分析時自動匯入使用
# -*- coding: utf-8 -*- """ Created on Sun Oct 7 11:53:25 2018 @author: ghosty @Program: usus_location_list.py @Prupose: download USGS data and analyze the quake location name for (1) location relation analysis (2) translation from English to Traditional Chinese, used for AI analysis tool development """
import csv import requests import os.path import datetime #import dateutil from dateutil.relativedelta import relativedelta
#init global variables locFileName = 'usgs_loc.txt' locationList=[] quakeList=[] #
def downloadUSGS(starttime, endtime, minmagnitude, maxmagnitude): #,latitude=121.1,longitude=23.5,maxradiuskm=20000): url = 'http://earthquake.usgs.gov/fdsnws/event/1/query'
query = { 'format': 'csv', 'starttime': starttime, 'endtime': endtime, 'minmagnitude': '5', 'eventtype':'earthquake' }
response = requests.get(url, params=query) if response.status_code != requests.codes.ok: print("USGS requesr fail") return #print("encoding: %s" % response.encoding) quakeList = list(csv.reader(response.text.split('\n'), delimiter=',')) header=quakeList[0] #time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,id,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource quakeList.pop(0) #remove header line print('USGS download #',len(quakeList)) return quakeList, header
def addLocation(quakeData): for quake in quakeData: #print('quake:',quake) if (len(quake)==0): #skip null record continue quakeTime = quake[0] quakeLatitude = quake[1] quakeLongitude = quake[2] quakeDepth = quake[3] quakeMag = quake[4] quakeMagType = quake[5] quakePlace = quake[13] #print(quakeTime, quakeLatitude, quakeLongitude, quakeDepth, quakeMag, quakeMagType, quakePlace) locs = quakePlace.split(',') #print(locs) #debug for 2013-02-01 for loc in locs: try: ofPos = loc.find(" of ") #do not usr 'of' only to prevent error if (ofPos>0): loc=loc[ofPos+4:] #loc.replace("the ","") loc.replace("the ","") #loc.strip() #remove space, strip() fail to use while (loc[0]==' '): loc=loc[1:] if (loc not in locationList): quake = '{0} {1} {2} {3} {4} {5} {6}'.format(quakeTime, quakeLatitude, quakeLongitude, quakeDepth, quakeMag, quakeMagType, quakePlace) print("Add [",loc, '] from [',quake,']') quakeList.append(quake) locationList.append(loc) #print(len(quakeList),len(locationList)) except: #source data error print('parse fail: ',quakeTime, quakeLatitude, quakeLongitude, quakeDepth, quakeMag, quakeMagType, quakePlace)
def saveLocation(locationList, locFileName): #print(len(locationList)) print("export ",len(locationList)," locations") with open(locFileName, 'w') as locfile: for i in range(len(locationList)): locfile.write('\"'+str(quakeList[i]) + '\"$\"' + str(locationList[i]) + '\"\n')
def loadLocation(locationList, locFileName): if (os.path.exists(locFileName)): with open (locFileName, 'r') as locfile: for line in locfile: quakeData = line.strip().split('$') quakeList.append(quakeData[0].strip('\"')) locationList.append(quakeData[1].strip('\"')) print("import ",len(locationList)," locations")
#------main()-------- # Load processed location data loadLocation(locationList, locFileName) #for all date dt1 = datetime.date(1900, 1, 1) #end_dt = datetime.date(2000, 1, 31) end_dt = datetime.date.today() while (dt1<end_dt): # in daterange(start_dt, end_dt): """ # if there are too many records at one download, reduce the download period if (dt1 < datetime.date(2018, 1, 1)): delta_dt = relativedelta(years=1) else: delta_dt = relativedelta(months=1) """ delta_dt = relativedelta(years=1) dt2 = dt1 + delta_dt print("\nProcessing...",dt1.strftime("%Y-%m-%d"),"~",dt2.strftime("%Y-%m-%d")) quakeDATA, header = downloadUSGS(dt1, dt2, 4.0, 10) addLocation(quakeDATA) dt1 = dt2
#save processed result saveLocation(locationList, locFileName) |
留言列表