# 爬虫
import requests
from bs4 import BeautifulSoup
import pymysql
def download_page(http_url):
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"}
call_back = requests.get(http_url, headers=headers)
return call_back.text
def get_page_content(html, page):
conn = pymysql.connect('127.0.0.1', 'ceshi', 'ceshi', 'ceshi')
cursor = conn.cursor()
soup = BeautifulSoup(html, 'html.parser')
con = soup.find('div', class_='plist')
con_list = con.find_all('div', class_='pl')
for item in con_list:
a11 = item.find('div', class_='td-j-name').find('a').get_text()
a22 = item.find('div', class_='td3 link_gray6').find('a').get_text()
a33 = item.find('div', class_='td4').get_text()
a55 = item.find('div', class_='td5').get_text()
print(a11, a22, a33, a55)
sql = 'INSERT INTO a123(zhiweis, gongsi, xinzhi, shijian) VALUE (%s,%s,%s,%s)'
value = (a11, a22, a33, a55)
cursor.execute(sql,value)
conn.commit()
# conn.close()
def main():
for i in range(1, 1000):
http_url = 'http://www.masaike.com/jobs/jobs_list/page/{}.htm'.format(i)
html = download_page(http_url)
get_page_content(html,i)
if __name__ == '__main__':
main()
这篇文章还没有评论