realName = [] userId = [] userurl = [] for p in range(1,page+1): url = 'http://mm.taobao.com/tstar/search/tstar_model.do?_input_charset=utf-8&q=&viewFlag=A&sortType=default&searchStyle=&searchRegion=city%3A&searchFansNum=¤tPage='+str(p)+'&pageSize=100' html = requests.post(url).content.decode('gbk') html = json.loads(html)['data']['searchDOList']
for data in html: Name = data['realName'] #真实姓名 Id = data['userId'] #用户ID url = 'https://mm.taobao.com/self/aiShow.htm?userId='+str(Id) #个人主页
defget_img(url): '''返回包含淘宝MM主页的所有图片url的一个列表''' html = requests.get(url).content.decode('GBK') html = etree.HTML(html) src = html.xpath('//div[@class="mm-aixiu-content"]//img/@src') img = [] for i in src: jpg = 'https:'+i if re.findall('(.*?).jpg',jpg)!=[]: img.append(jpg) img = list(set(img)) #去除重复url地址 return img
#遍历列表url,获取所有用户图片 for i in range(len(url)): img = get_img(url[i])
#判断文件夹是否存在,如果不存在则新建一个文件夹 path = 'D:/taobao/'+uname[i]+'/' if os.path.exists(path)!=True: os.makedirs(path) #下载图片到刚刚创建的目录(我这里指定到D:/taobao/下,每一个人单独一个文件夹) for j in range(len(img)): urllib.urlretrieve(img[j],path+str(j+1)+'.jpg') print uname[i],str(j+1)+'/'+str(len(img)),img[j]