這個程式從每日成交資訊讀取股市代號, 然後從yahoo下載公司基本資料,存成CSV檔,例如1101台泥https://tw.stock.yahoo.com/d/s/company_1101.html

附註ETF權證特別股等沒有公司基本資料可下載

# -*- coding: utf-8 -*-

"""

Created on Sat Mar 11 17:11:09 2017

 

@author: ghosty

"""

import csv

import ast

import httplib2

from urllib.parse import urlencode

from bs4 import BeautifulSoup

import pandas as pd

import datetime

from datetime import timedelta

                 

ProfileTitle =  ['股票代碼', '股票名稱', '產業類別', \

                  '104年現金股利', '104年股票股利', '104年盈餘配股', '104年公積配股', \

                  '成立時間', '上市()時間', \

                  '董事長', '總經理', '發言人', '股本', '營收比重', \

                  '營業毛利率', '營業利益率', '稅前淨利率', '資產報酬率', '股東權益報酬率', '每股淨值', \

                  'Y105Q3盈餘', 'Y105Q2盈餘', 'Y105Q1盈餘', 'Y104Q4盈餘', \

                  'Y1053季盈餘'

                 ]

                

def getProfile(stockID,stockName):

    url = 'https://tw.stock.yahoo.com/d/s/company_'+stockID+'.html'

    conn = httplib2.Http(cache=None)

    headers = {'Content-type': 'application/x-www-form-urlencoded',

           'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',

           #'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0'}      #windows

           #'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0'}      #Linux

           'User-Agent':'Mozilla/5.0 (Android; Mobile; rv:40.0) Gecko/40.0 Firefox/40.0'} #android phone

    resp, doc = conn.request(url, method='GET', body=None, headers=headers)

    #docStr = str(doc.decode('cp950'));     

    soup = BeautifulSoup(doc, 'html.parser')

    try:

        table1 = soup.findAll(text=' ')[0].parent.parent.parent

        table2 = soup.findAll(text='營業毛利率')[0].parent.parent.parent       

        category = table1.select('tr')[1].select('td')[1].text.strip()

        Y104cashshare = table1.select('tr')[1].select('td')[3].text.strip("")

        Y104stockshare = table1.select('tr')[2].select('td')[3].text.strip("")

        Y104earnshare = table1.select('tr')[3].select('td')[3].text.strip("")

        Y104remainshare = table1.select('tr')[4].select('td')[3].text.strip("")

        setupDate = table1.select('tr')[2].select('td')[1].text.strip().split("/")

        setupDate[0] = int(setupDate[0])+1911

        setupDate=str(setupDate[0])+'/'+setupDate[1]+'/'+setupDate[2]

        onboardDate = table1.select('tr')[3].select('td')[1].text.strip().split("/")

        onboardDate[0] = int(onboardDate[0])+1911

        onboardDate = str(onboardDate[0])+'/'+onboardDate[1]+'/'+onboardDate[2]

        chairman = table1.select('tr')[4].select('td')[1].text.strip()

        manager = table1.select('tr')[5].select('td')[1].text.strip()

        speaker = table1.select('tr')[6].select('td')[1].text.strip()

        capital = table1.select('tr')[7].select('td')[1].text.strip("")

        product = table1.select('tr')[10].select('td')[1].text.strip().strip('(2015)').strip()

        grossprofit  = table2.select('tr')[1].select('td')[1].text.strip()

        netprofit = table2.select('tr')[2].select('td')[1].text.strip()

        taxprofit = table2.select('tr')[3].select('td')[1].text.strip()

        rate = table2.select('tr')[4].select('td')[1].text.strip()

        Y105Q3 = table2.select('tr')[1].select('td')[3].text.strip().strip("")

        Y105Q2 = table2.select('tr')[2].select('td')[3].text.strip().strip("")

        Y105Q1 = table2.select('tr')[3].select('td')[3].text.strip().strip("")

        Y104Q4 = table2.select('tr')[4].select('td')[3].text.strip().strip("")       

        earn = table2.select('tr')[5].select('td')[1].text.strip()

        netvalue = table2.select('tr')[5].select('td')[2].text.strip("每股淨值:").strip().strip("")

        yearEarn = ast.literal_eval(Y105Q3) + ast.literal_eval(Y105Q2) + ast.literal_eval(Y105Q1)

 

        result = list([stockID, stockName, category , \

                  Y104cashshare, Y104stockshare, Y104earnshare, Y104remainshare, \

                  setupDate, onboardDate, \

                  chairman, manager, speaker, capital, product, \

                  grossprofit, netprofit, taxprofit, rate,  earn, netvalue, \

                  Y105Q3, Y105Q2, Y105Q1, Y104Q4, \

                  "{:0.2f}".format(yearEarn) \

                 ])

    except:

        result = [stockID, stockName, 'access fail']

       

    #print('result=',result)

    return result

   

#main

startTime = datetime.datetime.now()

csvfile = open('TwStockList.csv', newline='\n')

next(csvfile, None) #skip header line

stockList = csv.reader(csvfile, delimiter=',')

 

                

listProfile=[ProfileTitle]

for row in stockList: 

    result = getProfile(row[0],row[1])

    print(result)

    listProfile.append(result)

    #break  #test once

 

#save result

f = open("TwStockListProfile.csv","w")

w = csv.writer(f, lineterminator='\n')

w.writerows(listProfile)

f.close()

 

#performance calculation

stopTime =  datetime.datetime.now()

elapsedTime = stopTime - startTime

print('start time=',startTime)

print('stop  time=',stopTime)

print('elapsed =',elapsedTime)  

 

 

 

arrow
arrow
    文章標籤
    python
    全站熱搜
    創作者介紹
    創作者 ghostyguo 的頭像
    ghostyguo

    No More Codes

    ghostyguo 發表在 痞客邦 留言(0) 人氣()