Python实现csdn博客爬虫之3:抓取所有博文
def get_random_sleep() :time.sleep(random.randint(0,2))#抓取一个用户有几页博客listdef get_page_count(username):print('开始抓取用户:' + username + '的博客list页数')code=requests.get(url_prefix + username ,headers=headers).status_codeif code == 200:have_blog = Trueelse:hava_blog = Falsereturn 0try:url = url_prefix + username + '/article/list'req = urllib.request.Request(url, headers=headers)data = urllib.request.urlopen(req).read()data = data.decode('utf-8')soup = BeautifulSoup(data)page_content = soup.find(class_ = 'p...阅读全文