不重复访问网站,使用栈的程序from bs4 import BeautifulSoupimport urllib.requestclass Stack: def __init__(self): self.st=[] def pop(self): return self.st.pop() def push(self,obj): self.st.append(obj) def empty(self): return len(self.st)==0def spider(url): global urls stack=Stack() stack.push(url) while not stack.empty(): url=stack.pop() if url not in urls: _______________________ try: data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) links=soup.select("a") for i in _______________________: href=links[i]["href"] url=start_url+"/"+href stack.push(url) except Exception as err: print(err)start_url="http://127.0.0.1:5000"urls=[]spider(start_url)print("The End")
A.
urls.append(url); range(len(links),-1,-1)
B.
urls.append(url); range(len(links)-1,0,-1)
C.
urls.insert(url,0); range(len(links)-1,-1,-1)
D.
urls.append(url); range(len(links)-1,-1,-1)