add encoding detective

This commit is contained in:
xw_y_am@rmbp 2018-04-29 22:24:10 +08:00
parent 21901663bc
commit adf99e445b
2 changed files with 11 additions and 6 deletions

View File

@ -1,3 +1,4 @@
import chardet
import requests
from bs4 import BeautifulSoup
@ -9,7 +10,7 @@ class Sites:
def __del__(self):
print(self.name, 'stop !')
def stop(self):
self.to_stop = True
@ -21,7 +22,8 @@ class Sites:
return
try:
soup = self.fetch_soup(url)
except:
except Exception as e:
print('\n' + str(e))
break
for item in self.get_item(soup):
if self.to_stop:
@ -42,7 +44,10 @@ class Sites:
@staticmethod
def fetch_soup(url):
print('fetching "' + url + '" ...', end=' ')
soup = BeautifulSoup(requests.get(url).text, 'lxml')
req = requests.get(url)
det = chardet.detect(bytes(req.text, req.encoding))
req.encoding = det['encoding']
soup = BeautifulSoup(req.text, 'lxml')
print('ok!')
return soup
@ -254,7 +259,7 @@ class BTanw(Sites):
@staticmethod
def last_page(soup):
return len(soup.find('div', 'bottom-pager').find_all('a')[-1].get('href')) == 0
@staticmethod
def get_item(soup):
for dom_item in soup.find_all('div', 'search-item'):
@ -313,4 +318,4 @@ class Ciliba(Sites):
return soup.find('a', 'download').get('href')
lst = (Bobobt, BTcerise, Cililianc, BTdao, BTrabbit, BTanw, Ciliba)
lst = (Bobobt, BTcerise, BTdao, BTrabbit, BTanw, Ciliba)

2
ux.py
View File

@ -89,4 +89,4 @@ class SearchThread(QThread):
self.to_stop_search.emit()
def stop(self):
self.search.stop()
self.search.stop()