现在2020年2月5号测试非阻塞IO setblocking(False)有问题, 请求百度无数据返回
来源:12-3 select+回调+事件循环获取html-1

慕侠2065837
2020-02-05
def get_sock_an_port(proto):
if proto == 'https':
sock = ssl.wrap_socket(socket.socket())
port = 443
elif proto == 'http':
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
port = 80
return sock, port
def get_url(url):
host = urlparse(url).netloc
path = urlparse(url).path
proto = urlparse(url).scheme
sock, port = get_sock_an_port(proto)
sock.setblocking(False)
try:
sock.connect((host, port))
except BlockingIOError:
pass
headers = "GET {} HTTP/1.1\r\nConnection: close\r\nhost: {}\r\n\r\n".format(
path, host)
while True:
try:
sock.send(headers.encode('utf8'))
break
except OSError:
pass
r = b''
while True:
try:
message = sock.recv(1024)
except BlockingIOError:
continue
if message:
r += message
else:
break
sock.close()
print(r.decode('utf8'))
if __name__ == "__main__":
get_url('https://movie.douban.com/')
返回数据:
HTTP/1.1 400 Bad Request
Date: Wed, 05 Feb 2020 05:52:44 GMT
Content-Type: text/html
Transfer-Encoding: chunked
Connection: close
Server: dae
X-Content-Type-Options: nosniff
f8
400 The plain HTTP request was sent to HTTPS port
The plain HTTP request was sent to HTTPS port
nginx
400 Bad Request
0
Bobby老师, 现在2020年2月5号测试非阻塞IO setblocking(False) 请求百度后无数据返回, 如果不设置setblocking(False)是有数据返回. 在设置setblocking(False)测试了https:\movie.douban.com/ , 返回400 The plain HTTP request was sent to HTTPS port, 但是用阻塞IO就有数据返回, 希望Bobby老师帮忙看下是什么原因导致?
写回答
1回答
-
bobby
2020-02-05
import socket from urllib.parse import urlparse #使用非阻塞io完成http请求 def get_url(url): #通过socket请求html url = urlparse(url) host = url.netloc path = url.path if path == "": path = "/" #建立socket连接 client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) client.setblocking(False) try: client.connect((host, 80)) #阻塞不会消耗cpu except BlockingIOError as e: pass #不停的询问连接是否建立好, 需要while循环不停的去检查状态 #做计算任务或者再次发起其他的连接请求 while True: try: client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(path, host).encode("utf8")) break except OSError as e: pass data = b"" while True: try: d = client.recv(1024) except BlockingIOError as e: continue if d: data += d else: break data = data.decode("utf8") html_data = data.split("\r\n\r\n")[1] print(html_data) client.close() if __name__ == "__main__": get_url("http://www.baidu.com")
这是课程中讲解的源码 你可以测试一下 是能抓取到数据的,你这里抓取的并不是百度啊
00
相似问题