這個程式從每日成交資訊讀取股市代號, 然後從yahoo下載公司基本資料,存成CSV檔,例如1101台泥:https://tw.stock.yahoo.com/d/s/company_1101.html
附註:ETF、權證、特別股等,沒有公司基本資料可下載
# -*- coding: utf-8 -*- """ Created on Sat Mar 11 17:11:09 2017
@author: ghosty """ import csv import ast import httplib2 from urllib.parse import urlencode from bs4 import BeautifulSoup import pandas as pd import datetime from datetime import timedelta
ProfileTitle = ['股票代碼', '股票名稱', '產業類別', \ '104年現金股利', '104年股票股利', '104年盈餘配股', '104年公積配股', \ '成立時間', '上市(櫃)時間', \ '董事長', '總經理', '發言人', '股本', '營收比重', \ '營業毛利率', '營業利益率', '稅前淨利率', '資產報酬率', '股東權益報酬率', '每股淨值', \ 'Y105Q3盈餘', 'Y105Q2盈餘', 'Y105Q1盈餘', 'Y104Q4盈餘', \ 'Y105前3季盈餘' ]
def getProfile(stockID,stockName): url = 'https://tw.stock.yahoo.com/d/s/company_'+stockID+'.html' conn = httplib2.Http(cache=None) headers = {'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', #'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0'} #windows #'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0'} #Linux 'User-Agent':'Mozilla/5.0 (Android; Mobile; rv:40.0) Gecko/40.0 Firefox/40.0'} #android phone resp, doc = conn.request(url, method='GET', body=None, headers=headers) #docStr = str(doc.decode('cp950')); soup = BeautifulSoup(doc, 'html.parser') try: table1 = soup.findAll(text='基 本 資 料')[0].parent.parent.parent table2 = soup.findAll(text='營業毛利率')[0].parent.parent.parent category = table1.select('tr')[1].select('td')[1].text.strip() Y104cashshare = table1.select('tr')[1].select('td')[3].text.strip("元") Y104stockshare = table1.select('tr')[2].select('td')[3].text.strip("元") Y104earnshare = table1.select('tr')[3].select('td')[3].text.strip("元") Y104remainshare = table1.select('tr')[4].select('td')[3].text.strip("元") setupDate = table1.select('tr')[2].select('td')[1].text.strip().split("/") setupDate[0] = int(setupDate[0])+1911 setupDate=str(setupDate[0])+'/'+setupDate[1]+'/'+setupDate[2] onboardDate = table1.select('tr')[3].select('td')[1].text.strip().split("/") onboardDate[0] = int(onboardDate[0])+1911 onboardDate = str(onboardDate[0])+'/'+onboardDate[1]+'/'+onboardDate[2] chairman = table1.select('tr')[4].select('td')[1].text.strip() manager = table1.select('tr')[5].select('td')[1].text.strip() speaker = table1.select('tr')[6].select('td')[1].text.strip() capital = table1.select('tr')[7].select('td')[1].text.strip("億") product = table1.select('tr')[10].select('td')[1].text.strip().strip('(2015年)').strip() grossprofit = table2.select('tr')[1].select('td')[1].text.strip() netprofit = table2.select('tr')[2].select('td')[1].text.strip() taxprofit = table2.select('tr')[3].select('td')[1].text.strip() rate = table2.select('tr')[4].select('td')[1].text.strip() Y105Q3 = table2.select('tr')[1].select('td')[3].text.strip().strip("元") Y105Q2 = table2.select('tr')[2].select('td')[3].text.strip().strip("元") Y105Q1 = table2.select('tr')[3].select('td')[3].text.strip().strip("元") Y104Q4 = table2.select('tr')[4].select('td')[3].text.strip().strip("元") earn = table2.select('tr')[5].select('td')[1].text.strip() netvalue = table2.select('tr')[5].select('td')[2].text.strip("每股淨值:").strip().strip("元") yearEarn = ast.literal_eval(Y105Q3) + ast.literal_eval(Y105Q2) + ast.literal_eval(Y105Q1)
result = list([stockID, stockName, category , \ Y104cashshare, Y104stockshare, Y104earnshare, Y104remainshare, \ setupDate, onboardDate, \ chairman, manager, speaker, capital, product, \ grossprofit, netprofit, taxprofit, rate, earn, netvalue, \ Y105Q3, Y105Q2, Y105Q1, Y104Q4, \ "{:0.2f}".format(yearEarn) \ ]) except: result = [stockID, stockName, 'access fail']
#print('result=',result) return result
#main startTime = datetime.datetime.now() csvfile = open('TwStockList.csv', newline='\n') next(csvfile, None) #skip header line stockList = csv.reader(csvfile, delimiter=',')
listProfile=[ProfileTitle] for row in stockList: result = getProfile(row[0],row[1]) print(result) listProfile.append(result) #break #test once
#save result f = open("TwStockListProfile.csv","w") w = csv.writer(f, lineterminator='\n') w.writerows(listProfile) f.close()
#performance calculation stopTime = datetime.datetime.now() elapsedTime = stopTime - startTime print('start time=',startTime) print('stop time=',stopTime) print('elapsed =',elapsedTime) |
留言列表