Python套接字Socket读取http网页web数据

基于Python的套接字Socket,读取网页web的数据,以读取百度首页的内容为例,程序代码如下:

import socket


def getDataBySocket(url):
    sock = socket.socket()
    ip_port = (url, 80)
    sock.connect(ip_port)

    print("建立连接的远程服务器地址:", sock.getpeername())

    sock.send("GET / HTTP/1.1\r\n".encode("utf-8"))
    sock.send(("Host: "+url+"\r\n").encode("utf-8"))
    sock.send(("\n").encode("utf-8"))

    size = 1024
    while True:
        try:
            data = sock.recv(size)
            count = len(data)
            print("读取数据数量:", count)

            if count == 0:
                print("读数据完毕")
                break

            ret = str(data, encoding="utf-8")
            print(ret)
        except BaseException as exc:
            print("发生异常")
            break


if __name__ == '__main__':
    url = "www.baidu"
    getDataBySocket(url)

 

更多推荐

Python套接字Socket读取http网页web数据