337 lines
9.1 KiB
Python
337 lines
9.1 KiB
Python
import chardet
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def lst():
|
|
def get():
|
|
for _, value in globals().items():
|
|
try:
|
|
if value.site_enable:
|
|
yield value
|
|
except:
|
|
pass
|
|
return tuple(get())
|
|
|
|
|
|
class Sites():
|
|
def __init__(self, signal):
|
|
self.send_item = signal
|
|
print(self.name, 'start ...')
|
|
|
|
def __del__(self):
|
|
print(self.name, 'stop !')
|
|
|
|
def stop(self):
|
|
self.to_stop = True
|
|
|
|
def search(self, key_word):
|
|
self.to_stop = False
|
|
key_word = self.trns_key(key_word)
|
|
for url in self.gen_url(key_word):
|
|
if self.to_stop:
|
|
return
|
|
try:
|
|
soup = self.fetch_soup(url)
|
|
except Exception as e:
|
|
print('\n' + str(e))
|
|
break
|
|
for item in self.get_item(soup):
|
|
if self.to_stop:
|
|
return
|
|
if 'link_url' in item:
|
|
try:
|
|
item['link'] = self.get_link(self.fetch_soup(item['link_url']))
|
|
except:
|
|
item['link'] = ''
|
|
if item['link']:
|
|
self.send_item.emit(item)
|
|
try:
|
|
if self.last_page(soup):
|
|
break
|
|
except:
|
|
break
|
|
|
|
@staticmethod
|
|
def fetch_soup(url):
|
|
print('fetching "' + url + '" ...', end=' ')
|
|
req = requests.get(url)
|
|
det = chardet.detect(bytes(req.text, req.encoding))
|
|
req.encoding = det['encoding']
|
|
soup = BeautifulSoup(req.text, 'lxml')
|
|
print('ok!')
|
|
return soup
|
|
|
|
@staticmethod
|
|
def gen_url(key_word):
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def last_page(soup):
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def get_item(soup):
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def get_link(url):
|
|
raise NotImplementedError
|
|
|
|
|
|
class Bobobt(Sites):
|
|
name = 'Bobobt'
|
|
site_enable = True
|
|
|
|
@staticmethod
|
|
def trns_key(key_word):
|
|
return key_word.replace(' ', '%20')
|
|
|
|
@staticmethod
|
|
def gen_url(key_word):
|
|
page_number = 0
|
|
while True:
|
|
page_number += 1
|
|
yield 'https://www.bobobt.com/search/%s/%d/0/0.html' % (key_word, page_number)
|
|
|
|
@staticmethod
|
|
def last_page(soup):
|
|
return soup.find('div', 'pager').find_all('a')[-1].string not in ['>>', '>>']
|
|
|
|
@staticmethod
|
|
def get_item(soup):
|
|
for dom_item in soup.find_all('div', 'ss'):
|
|
try:
|
|
la = dom_item.find_all('a')
|
|
lb = dom_item.find_all('b')
|
|
yield {
|
|
'hot': lb[4].string,
|
|
'size': lb[1].string,
|
|
'name': ''.join(la[0].strings).strip(),
|
|
'link': la[1].get('href')
|
|
}
|
|
except:
|
|
continue
|
|
|
|
|
|
class BTcerise(Sites):
|
|
name = 'BTcerise'
|
|
site_enable = True
|
|
|
|
@staticmethod
|
|
def trns_key(key_word):
|
|
return key_word.replace(' ', '%20')
|
|
|
|
@staticmethod
|
|
def gen_url(key_word):
|
|
page_number = 0
|
|
while True:
|
|
page_number += 1
|
|
yield 'http://www.btcerise.me/search?keyword=%s&p=%d' % (key_word, page_number)
|
|
|
|
@staticmethod
|
|
def last_page(soup):
|
|
return 'disable' in soup.find('ul', 'pagination').find_all('li')[-1]
|
|
|
|
@staticmethod
|
|
def get_item(soup):
|
|
for dom_item in soup.find_all('div', 'r'):
|
|
try:
|
|
yield {
|
|
'hot': '-',
|
|
'size': dom_item.find_all('span', 'prop_val')[1].string,
|
|
'name': ''.join(dom_item.find('h5').strings),
|
|
'link': dom_item.find('div').find('a').get('href')
|
|
}
|
|
except:
|
|
continue
|
|
|
|
|
|
class Cililianc(Sites):
|
|
name = 'Cililianc'
|
|
site_enable = False
|
|
|
|
@staticmethod
|
|
def trns_key(key_word):
|
|
return key_word.replace(' ', '%2B')
|
|
|
|
@staticmethod
|
|
def gen_url(key_word):
|
|
page_number = 0
|
|
while True:
|
|
page_number += 1
|
|
yield 'http://cililianc.com/list/%s/%d.html' % (key_word, page_number)
|
|
|
|
@staticmethod
|
|
def last_page(soup):
|
|
return soup.find('div', 'pg').find_all('a')[-1].string not in ['下一页']
|
|
|
|
@staticmethod
|
|
def get_item(soup):
|
|
for dom_item in soup.find('ul', 'mlist').find_all('li'):
|
|
try:
|
|
yield {
|
|
'hot': '-',
|
|
'size': dom_item.find('dt').find('span').string,
|
|
'name': ''.join(dom_item.find('a').strings),
|
|
'link': dom_item.find('div', 'dInfo').find('a').get('href')
|
|
}
|
|
except:
|
|
continue
|
|
|
|
|
|
class BTdao(Sites):
|
|
name = 'BTdao'
|
|
site_enable = True
|
|
|
|
@staticmethod
|
|
def trns_key(key_word):
|
|
return key_word.replace(' ', '%2B')
|
|
|
|
@staticmethod
|
|
def gen_url(key_word):
|
|
page_number = 0
|
|
while True:
|
|
page_number += 1
|
|
yield 'http://www.btdao.me/list/%s-s2d-%d.html' % (key_word, page_number)
|
|
|
|
@staticmethod
|
|
def last_page(soup):
|
|
return soup.find('div', 'pg').find_all('a')[-1].string not in ['下一页']
|
|
|
|
@staticmethod
|
|
def get_item(soup):
|
|
for dom_item in soup.find_all('li'):
|
|
try:
|
|
la = dom_item.find('a')
|
|
ls = dom_item.find('dl').find_all('span')
|
|
yield {
|
|
'hot': ls[3].string,
|
|
'size': ls[0].string,
|
|
'name': la.get('title'),
|
|
'link_url': 'http://www.btdao.me' + la.get('href')
|
|
}
|
|
except:
|
|
continue
|
|
|
|
@staticmethod
|
|
def get_link(soup):
|
|
return soup.find('dl', 'BotInfo').find('a').get('href')
|
|
|
|
|
|
class BTrabbit(Sites):
|
|
name = 'BTrabbit'
|
|
site_enable = True
|
|
|
|
@staticmethod
|
|
def trns_key(key_word):
|
|
return key_word.replace(' ', '%20')
|
|
|
|
@staticmethod
|
|
def gen_url(key_word):
|
|
page_number = 0
|
|
while True:
|
|
page_number += 1
|
|
yield 'http://www.btrabbit.net/search/%s/default-%d.html' % (key_word, page_number)
|
|
|
|
@staticmethod
|
|
def last_page(soup):
|
|
return soup.find('div', 'bottom-pager').find_all('a')[-1].string not in ['>']
|
|
|
|
@staticmethod
|
|
def get_item(soup):
|
|
for dom_item in soup.find_all('div', 'search-item'):
|
|
try:
|
|
la = dom_item.find('a')
|
|
lb = dom_item.find('div', 'item-bar').find_all('b')
|
|
yield {
|
|
'hot': lb[2].string,
|
|
'size': lb[1].string.replace(' ', ' '),
|
|
'name': la.get('title'),
|
|
'link_url': 'http://www.btrabbit.net' + la.get('href')
|
|
}
|
|
except:
|
|
continue
|
|
|
|
@staticmethod
|
|
def get_link(soup):
|
|
return soup.find_all('textarea')[0].string
|
|
|
|
|
|
class BTanw(Sites):
|
|
name = 'BTanw'
|
|
site_enable = True
|
|
|
|
@staticmethod
|
|
def trns_key(key_word):
|
|
return key_word.replace(' ', '%20')
|
|
|
|
@staticmethod
|
|
def gen_url(key_word):
|
|
page_number = 0
|
|
while True:
|
|
page_number += 1
|
|
yield 'http://www.btanw.com/search/%s-hot-desc-%d' % (key_word, page_number)
|
|
|
|
@staticmethod
|
|
def last_page(soup):
|
|
return len(soup.find('div', 'bottom-pager').find_all('a')[-1].get('href')) == 0
|
|
|
|
@staticmethod
|
|
def get_item(soup):
|
|
for dom_item in soup.find_all('div', 'search-item'):
|
|
try:
|
|
la = dom_item.find('a')
|
|
lb = dom_item.find('div', 'item-bar').find_all('b')
|
|
yield {
|
|
'hot': lb[1].string,
|
|
'size': lb[3].string,
|
|
'name': ''.join(la.strings),
|
|
'link_url': 'http://www.btanw.com' + la.get('href')
|
|
}
|
|
except:
|
|
continue
|
|
|
|
@staticmethod
|
|
def get_link(soup):
|
|
return soup.find('div', 'fileDetail').find_all('p')[5].find('a').get('href')
|
|
|
|
|
|
class Ciliba(Sites):
|
|
name = 'Ciliba'
|
|
site_enable = True
|
|
|
|
@staticmethod
|
|
def trns_key(key_word):
|
|
return key_word.replace(' ', '+')
|
|
|
|
@staticmethod
|
|
def gen_url(key_word):
|
|
page_number = 0
|
|
while True:
|
|
page_number += 1
|
|
yield 'https://www.ciliba.org/s/%s_rel_%d.html' % (key_word, page_number)
|
|
|
|
@staticmethod
|
|
def last_page(soup):
|
|
return soup.find('ul', 'pagination').find_all('a')[-1].string not in ['Last']
|
|
|
|
@staticmethod
|
|
def get_item(soup):
|
|
for dom_item in soup.find_all('div', 'search-item'):
|
|
try:
|
|
la = dom_item.find('h3').find('a')
|
|
lb = dom_item.find('div', 'item-bar').find_all('b')
|
|
yield {
|
|
'hot': lb[2].string,
|
|
'size': lb[1].string,
|
|
'name': ''.join(la.strings),
|
|
'link_url': la.get('href')
|
|
}
|
|
except:
|
|
continue
|
|
|
|
@staticmethod
|
|
def get_link(soup):
|
|
return soup.find('a', 'download').get('href')
|