最近一个项目,需要用到地区按拼音索引。在网上没有找到合适的数据。决定用python写个爬虫脚本,爬取美团的城市列表,然后在请求高德行政区域查询接口补充数据。
import urllib.request
from bs4 import BeautifulSoup
import json
import requests
#根据城市名称通过高德行政区查询接口获取城市编码和经纬度
def getLocation(name):
params = {'key': '809e7cf228d47949c89155055f5ceba9', 'keywords': name}
url = 'https://restapi.amap.com/v3/config/district'
res = requests.get(url, params)
res_dict = json.loads(res.content)
if len(res_dict['districts']) > 0 and int(res_dict['count']) > 0:
city_code = res_dict['districts'][0]['citycode']
center = res_dict['districts'][0]['center']
local = center.partition(",")
if len(local) > 0:
return city_code, local[0], local[2]
response = urllib.request.urlopen("https://www.meituan.com/changecity/") #美团的城市列表页
html = response.read().decode("utf-8") #将页面编码转为utf-8
bf = BeautifulSoup(html, 'html.parser') #解析页面内容
labels = bf.find_all('span', class_ = 'city-label')#查询首字母标识
city_list = []
for label in labels:
letter = label.get_text()
datas = bf.select('#city-'+letter) #查询城市信息
for item in datas:
citys = []
for city in item.find_all(class_ = 'city'): #轮询城市名称
if len(city) > 0:
info = getLocation(city) #调用高德查询接口函数
try:
citys.append({'name': city.get_text(), 'code': info[0], 'Longitude': info[1], 'Latitude': info[2]}) #将城市信息添加到城市列表中
except Exception:
print(info)
city_list.append({'letter':letter, 'data': citys})
print(json.dumps(city_list))