add encoding detective
This commit is contained in:
parent
21901663bc
commit
adf99e445b
15
sites.py
15
sites.py
@ -1,3 +1,4 @@
|
||||
import chardet
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
@ -9,7 +10,7 @@ class Sites:
|
||||
|
||||
def __del__(self):
|
||||
print(self.name, 'stop !')
|
||||
|
||||
|
||||
def stop(self):
|
||||
self.to_stop = True
|
||||
|
||||
@ -21,7 +22,8 @@ class Sites:
|
||||
return
|
||||
try:
|
||||
soup = self.fetch_soup(url)
|
||||
except:
|
||||
except Exception as e:
|
||||
print('\n' + str(e))
|
||||
break
|
||||
for item in self.get_item(soup):
|
||||
if self.to_stop:
|
||||
@ -42,7 +44,10 @@ class Sites:
|
||||
@staticmethod
|
||||
def fetch_soup(url):
|
||||
print('fetching "' + url + '" ...', end=' ')
|
||||
soup = BeautifulSoup(requests.get(url).text, 'lxml')
|
||||
req = requests.get(url)
|
||||
det = chardet.detect(bytes(req.text, req.encoding))
|
||||
req.encoding = det['encoding']
|
||||
soup = BeautifulSoup(req.text, 'lxml')
|
||||
print('ok!')
|
||||
return soup
|
||||
|
||||
@ -254,7 +259,7 @@ class BTanw(Sites):
|
||||
@staticmethod
|
||||
def last_page(soup):
|
||||
return len(soup.find('div', 'bottom-pager').find_all('a')[-1].get('href')) == 0
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get_item(soup):
|
||||
for dom_item in soup.find_all('div', 'search-item'):
|
||||
@ -313,4 +318,4 @@ class Ciliba(Sites):
|
||||
return soup.find('a', 'download').get('href')
|
||||
|
||||
|
||||
lst = (Bobobt, BTcerise, Cililianc, BTdao, BTrabbit, BTanw, Ciliba)
|
||||
lst = (Bobobt, BTcerise, BTdao, BTrabbit, BTanw, Ciliba)
|
||||
|
Loading…
x
Reference in New Issue
Block a user