python爬蟲

      在〈python爬蟲〉中尚無留言

主程式

import mysql.connector as mysql
from G import G
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import time
from bs4 import BeautifulSoup

from selenium.webdriver.support.select import Select#下拉選單
from selenium.webdriver.support.wait import WebDriverWait#等待
from selenium.webdriver.support import expected_conditions as EC#事件
from selenium.webdriver.common.by import By


def transform_date(date):  # 民國轉西元
    y, m, d = date.split('/')
    return str(int(y) + 1911) + '/' + m + '/' + d

def getValue(yy,mm):
    stocks={}
    web.get('https://www.twse.com.tw/zh/page/trading/indices/MI_5MINS_HIST.html')
    select_yy=Select(web.find_element_by_name('yy'))
    select_yy.select_by_value(f'{yy}')
    select_mm=Select(web.find_element_by_name('mm'))
    select_mm.select_by_value(f'{mm}')
    btn=web.find_element_by_class_name("button")
    btn.click()
    try:
        WebDriverWait(web,1).until(EC.presence_of_element_located((By.ID,'td')))
        #(By.ID,'td') 元祖
        #EC.presence_of_element_located(By.ID,'td') 這樣不行
    except:
        pass

    trs=web.find_elements_by_tag_name('tr')
    for tr in trs:
        ls=[]
        tds=tr.find_elements_by_tag_name('td')
        if len(tds)==5:
            for i in range(1,5):
                ls.append(float(tds[i].text.replace(',','')))
            stocks[tds[0].text]=ls
    cmd = "insert into stock (sk_date,sk_open,sk_high,sk_low,sk_close) values "
    for key in stocks.keys():
        #print(key,stocks[key])
        cmd+=f"('{transform_date(key)}','{stocks[key][0]}','{stocks[key][1]}','{stocks[key][2]}','{stocks[key][3]}'),"
    cmd=cmd[:-1]
    #print(cmd)
    try:
        cursor.execute(cmd)
        conn.commit()
    except:
        pass
    time.sleep(5)

        #for td in tr.find_elements_by_tag_name('td'):
        #    #print(f'{td.text} ',end='')
        #    s=td.text.replace(',','')
        #    print(f'{s}',end=' ')
        #print()


conn=mysql.connect(host=G.ip,user=G.account,password=G.password,database=G.db)
cursor=conn.cursor()
options=Options()
#options.add_argument('--headless')#無頭瀏覽器(無介面)
options.add_argument('--disable-gpu')#瀏覽器不啟用GPU(外部顯卡cpu)
web=webdriver.Chrome(options=options)
#web=webdriver.Chrome(executable_path="C:/secde/python/chromedriver.exe",options=options)
getValue(2010,2)
getValue(2010,3)
web.close()
cursor.close()
conn.close()

帳號密碼

class G():
ip='localhost'
account='student'
password='1234'
db='cloud'

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *