扫描表格中域名的title 并写表

import requests
#import BeautifulSoup
from readability import Document
import xlrd
#import xlwr
workbook=xlrd.open_workbook("e:\domain.xls")
table=workbook.sheet_by_name('Sheet1')
#print (type(table))
domain_name=table.cell(1,1).value
#print (domain_name)
domain_namelist=[]
#print(table.nrows)

def write_xls(domain_name,title):
    workbook=xlwt.Workbook()
    worksheet


for m in range(table.nrows):
	if m<table.nrows:
		domain_namelist.append(table.cell(m,1).value)
for e,f in enumerate(domain_namelist):
    url="http://"+f
    try:
        headers = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
           'Accept-Encoding':'gzip, deflate, sdch, br',
           'Accept-Language':'zh-CN,zh;q=0.8',
           'Connection':'keep-alive',
           'Upgrade-Insecure-Requests':'1',
           'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
        re=requests.get(url,headers=headers,timeout=5)
        re.raise_for_status
    except requests.RequestException as e:
        print("%s:无法访问"%f)
    else:
        #print(re.text)
        doc=Document(re.text)
        print(f,doc.title())

Add a Comment

电子邮件地址不会被公开。 必填项已用*标注