add encoding detective

This commit is contained in:
xw_y_am@rmbp 2018-04-29 22:24:10 +08:00
parent 21901663bc
commit adf99e445b
2 changed files with 11 additions and 6 deletions

View File

@ -1,3 +1,4 @@
import chardet
import requests
from bs4 import BeautifulSoup
@ -21,7 +22,8 @@ class Sites:
return
try:
soup = self.fetch_soup(url)
except:
except Exception as e:
print('\n' + str(e))
break
for item in self.get_item(soup):
if self.to_stop:
@ -42,7 +44,10 @@ class Sites:
@staticmethod
def fetch_soup(url):
print('fetching "' + url + '" ...', end=' ')
soup = BeautifulSoup(requests.get(url).text, 'lxml')
req = requests.get(url)
det = chardet.detect(bytes(req.text, req.encoding))
req.encoding = det['encoding']
soup = BeautifulSoup(req.text, 'lxml')
print('ok!')
return soup
@ -313,4 +318,4 @@ class Ciliba(Sites):
return soup.find('a', 'download').get('href')
lst = (Bobobt, BTcerise, Cililianc, BTdao, BTrabbit, BTanw, Ciliba)
lst = (Bobobt, BTcerise, BTdao, BTrabbit, BTanw, Ciliba)