python自动爬取api数据并存入mysql
- 共 679 次检阅

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import urllib.request
import json
import jsonpath
import pymysql
import ssl
import time

# 格式化成2016-03-20 11:45:39形式
tys = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
ssl._create_default_https_context = ssl._create_unverified_context
pages = 430;
conn = pymysql.connect('127.0.0.1', 'lichuandata', 'FizSBL3C8iKLAtH7', 'lichuandata')
cursor = conn.cursor()
sqls = "truncate table link_dian"
sqlss = "truncate table link_dian"
cursor.execute(sqls)
cursor.execute(sqlss)
while pages <= 550:
    url = "https://api.masaike.com/Bm.Tingdian/info?tingdian_id={}".format(pages)
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'}
    request = urllib.request.Request(url, headers = headers)
    response = urllib.request.urlopen(request)
    #  取出json文件里的内容,返回的格式是字符串
    html =  response.read()
    # 把json形式的字符串转换成python形式的Unicode字符串
    unicodestr = json.loads(html)
    # Python形式的列表
    td_a = jsonpath.jsonpath(unicodestr, "$..tingdian_id")
    td_b = jsonpath.jsonpath(unicodestr, "$..typecode")
    td_c = jsonpath.jsonpath(unicodestr, "$..linename")
    td_d = jsonpath.jsonpath(unicodestr, "$..poweroffid")
    td_e = jsonpath.jsonpath(unicodestr, "$..starttime")
    td_f = jsonpath.jsonpath(unicodestr, "$..stopdate")
    td_g = jsonpath.jsonpath(unicodestr, "$..scope")
    td_h = jsonpath.jsonpath(unicodestr, "$..poweroffreason")
    td_i = jsonpath.jsonpath(unicodestr, "$..powercomm")
    td_j = jsonpath.jsonpath(unicodestr, "$..pubtranname")
    #for item in city_list:
    #    print item
    # dumps()默认中文为ascii编码格式,ensure_ascii默认为Ture
    # 禁用ascii编码格式,返回的Unicode字符串,方便使用
    for a, b, c, d, e, f, g, h, i, j in zip(td_a, td_b, td_c, td_d, td_e, td_f, td_g, td_h, td_i, td_j):
        if i == '无':
            i = "停电信息已过期"
        elif e >= tys:
            i = "未到停电开始时间"
        elif f <= tys:
            i = "停电信息已过期"
        elif e < tys or f > tys:
            i = "停电进行时"
        else:
            g = i
        k = ''
        l = ''
        m = ''
        data = {
            'ID': a,
            '名称': b,
            '线路': c,
            '停电编号': d,
            '开始时间': e,
            '结束时间': f,
            '停电范围': g,
            '停电原因': h,
            '停电状态': i,
            '停电台区': j,
        }
        if f >= tys:
            print(data)
            sql = "INSERT IGNORE INTO link_dian values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            value = (a, b, c, d, e, f, g, h, i, j, k, l, m)
            cursor.execute(sql, value)
            conn.commit()
        else:
            print("---------------------------------------------------------")
            print(data)
            sql = "INSERT IGNORE INTO link_dianover values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            value = (a, b, c, d, e, f, g, h, i, j, k, l, m)
            cursor.execute(sql, value)
            conn.commit()

    pages = pages + 1

 

分享到:

这篇文章还没有评论

发表评论