|
- # -*- coding: utf-8 -*-
- """
- Created on Fri Oct 23 21:36:03 2020
- @author: Administrator
- 每天下午4 点,数据可以增量更新,不需要每天必须4点来点击
- """
- from sqlalchemy import create_engine
- import pandas as pd
- import tushare as ts
- pro = ts.pro_api('7322d9b3de6e6f0a650c5f3a70ca121cb35006a52a1c8c46ee7f0c63')
- import time
- engine3 = create_engine("mysql+pymysql://{}:{}@{}/{}?charset={}".format('root', 'root@123', 'localhost:3306', 'fund','utf8'))
- con3 = engine3.connect()#创建连接
- #如果第一次插入数据,获取开始时间
- try:
- sta = pd.read_sql(sql='SELECT MAX( `trade_date`) as sta FROM `stock_daily_temp`', con=engine3) #read data to DataFrame 'df
- sta = sta.iloc[0,0]
- except:
- sta = '20201010'
- #获取结束时间
- temp1 = pro.daily(ts_code='000001.SZ', start_date='20201010')
- end = max(temp1['trade_date'])
- #获取爬数据周期
- OpenList = pro.trade_cal(start_date=sta, end_date=end )
- kk = OpenList[OpenList.is_open ==1]['cal_date']
- if end > sta:
- # 获取数据入库
- te = pd.DataFrame()
- for i in kk[1:]:
- print(i)
- df = pro.daily(trade_date=i)
-
- te = te.append(df)
- time.sleep(2)
-
- te['code'] = te['ts_code'].str.split('.',expand=True)[0]
- te['end_pct_chg'] = te['pct_chg']
- te['amount'] = te['amount'] * 1000 # 数据有问题,需要*1000 单位元
- te.to_sql(name='stock_daily_temp', con=con3, if_exists='append', index=False)
- print('当天数据已经更新')
- else:
- print('日线数据已经是最新,库 - 外网: ' ,sta,end)
复制代码
爬虫效果:
|
|