add encoding detective
This commit is contained in:
parent
21901663bc
commit
adf99e445b
11
sites.py
11
sites.py
@ -1,3 +1,4 @@
|
|||||||
|
import chardet
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
@ -21,7 +22,8 @@ class Sites:
|
|||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
soup = self.fetch_soup(url)
|
soup = self.fetch_soup(url)
|
||||||
except:
|
except Exception as e:
|
||||||
|
print('\n' + str(e))
|
||||||
break
|
break
|
||||||
for item in self.get_item(soup):
|
for item in self.get_item(soup):
|
||||||
if self.to_stop:
|
if self.to_stop:
|
||||||
@ -42,7 +44,10 @@ class Sites:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def fetch_soup(url):
|
def fetch_soup(url):
|
||||||
print('fetching "' + url + '" ...', end=' ')
|
print('fetching "' + url + '" ...', end=' ')
|
||||||
soup = BeautifulSoup(requests.get(url).text, 'lxml')
|
req = requests.get(url)
|
||||||
|
det = chardet.detect(bytes(req.text, req.encoding))
|
||||||
|
req.encoding = det['encoding']
|
||||||
|
soup = BeautifulSoup(req.text, 'lxml')
|
||||||
print('ok!')
|
print('ok!')
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
@ -313,4 +318,4 @@ class Ciliba(Sites):
|
|||||||
return soup.find('a', 'download').get('href')
|
return soup.find('a', 'download').get('href')
|
||||||
|
|
||||||
|
|
||||||
lst = (Bobobt, BTcerise, Cililianc, BTdao, BTrabbit, BTanw, Ciliba)
|
lst = (Bobobt, BTcerise, BTdao, BTrabbit, BTanw, Ciliba)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user