找了下江苏的小伙伴们

我的第一个爬虫

Posted by wyj on October 2, 2018
from urllib import request
from urllib import error
import time

for i in range(1000,1500):
	try:
		f=request.urlopen("https://www.luogu.org/team/show?teamid=" + str(i))
	except error.HTTPError:
		continue
	else:
		data = f.read()
		tmp = data.decode("utf-8")
		if tmp.find("江苏")!=-1:
			print(i)
		#time.sleep(0.1)

UPD : 发现这样会漏掉好多……只好手动筛选了

from urllib import request
from urllib import error
import time

for i in range(1000,2000):
	s="https://www.luogu.org/team/show?teamid=" + str(i)
	try:
		f=request.urlopen(s)
	except error.HTTPError:
		continue
	else:
		data = f.read()
		tmp = data.decode("utf-8")
		t=tmp.find('<h1 name="teamtitle" teamid="'+str(i)+'" my="">')
		print("%d: " % i,end='')
		t+=len(s)-1
		while tmp[t]!='\n':
			print(tmp[t],end='')
			t+=1
		print("")
		#time.sleep(0.1)