【批量下载worldwidetorrents种子】

我用来管理漫画的脚本之一。
用于下载Nemesis43、Shipjolly和RubrumPopulus的种子，你也可以下载其他用户的，自己填用户id即可。
以后可能会公开更多脚本。
注意：有三个参数要填你自己的，cookie、header还有保存目录。浏览器F12开控制台看。
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155


import os
import re
import urllib.parse

import requests
from lxml import html

user_id = 36  # Nemesis43
user_id = 4445  # Shipjolly
user_id = 20217  # RubrumPopulus

ID_STRING = '''
36
4445
20217
'''


def check_torrent(href):
    """download.php?id=445&amp;name=Back%20to%20the%20Future%20011%20%282016%29%20%28Digital%29%20%28Kileko-Empire%29.cbr.torrent"""
    download_pattern = re.compile(r'^download\.php\?id=\d{1,20}&name=(.+)')
    status = re.search(download_pattern, href)
    return status


def get_filename(href):
    """download.php?id=445&amp;name=Back%20to%20the%20Future%20011%20%282016%29%20%28Digital%29%20%28Kileko-Empire%29.cbr.torrent"""
    download_pattern = re.compile(r'^download\.php\?id=\d{1,20}&name=(.+)')
    download_match = download_pattern.match(href)
    filename = ''
    if download_match:
        filename = download_match.group(1)
    filename = urllib.parse.unquote(filename)
    return filename


def check_page(href):
    """torrents-user.php?id=36&page=239"""
    page_pattern = re.compile(r'^torrents-user\.php\?id=\d{1,20}&page=\d{1,20}')
    # page_pattern = r'^torrents-user'
    status = re.search(page_pattern, href)
    return status


def get_page_num(href):
    """torrents-user.php?id=36&page=239"""
    p_num = re.compile(r'^torrents-user\.php\?id=\d{1,20}&page=(\d{1,20})')
    m_num = p_num.match(href)
    if m_num and m_num.group(1).isnumeric():
        page_num = int(m_num.group(1))
    else:
        page_num = 0
    return page_num


def download_f_url(download_url, file_path):
    print(download_url)
    with open(file_path, "wb") as file:
        response = requests.get(url=download_url, headers=header, cookies=cookie)
        file.write(response.content)


def get_html(url):
    header['Referer'] = url
    page = requests.get(url=url, headers=header, cookies=cookie)
    text = page.text
    return text


def get_hrefs(user_id, page_num):
    url = 'https://worldwidetorrents.me/torrents-user.php'

    param = {
        'id': user_id,
        'page': page_num,
    }
    # referer:https://worldwidetorrents.me/account-details.php?id=4445

    referer = url + '?id=' + str(user_id)
    header['Referer'] = referer

    page = requests.get(url=url, headers=header, cookies=cookie, params=param)

    tree = html.fromstring(page.text)
    # ====================获取列表====================
    hrefs = tree.xpath('//center/table//a[@href]/@href')

    return hrefs


def ReadHtml(user_id, set_page=999):
    sub_dir = os.path.join(file_dir, str(user_id))
    # ================读取第1页================
    """https://worldwidetorrents.me/torrents-user.php?id=36&page=237"""
    page_num = 0
    hrefs = get_hrefs(user_id, page_num)

    page_hrefs = [href for href in hrefs if check_page(href)]
    torrent_hrefs = [href for href in hrefs if check_torrent(href)]

    page_nums = [get_page_num(href) for href in page_hrefs]
    if page_nums == []:
        max_page = 1

    else:
        max_page = max(page_nums)
        if max_page == 0:
            max_page = 1

    print(max_page)

    torrents = []
    wwt_prefix = 'https://worldwidetorrents.me/'
    end_page = min(max_page, set_page)
    print('end_page:', end_page)
    # ================读取全部页面================
    for page_num in range(0, end_page + 1):
        hrefs = get_hrefs(user_id, page_num)
        torrent_hrefs = [href for href in hrefs if check_torrent(href)]
        print(page_num)

        torrents.extend(torrent_hrefs)
        for torrent in torrent_hrefs:
            download_url = wwt_prefix + torrent
            filename = get_filename(torrent)
            if not filename.lower().endswith('.torrent'):
                filename += '.torrent'
            print(filename)
            file_path = os.path.join(sub_dir, filename)
            if not os.path.exists(file_path):  # 判断目标是否存在
                try:
                    download_f_url(download_url, file_path)
                except:
                    print('请注意:', download_url)
            else:
                # print(filename + '已下载')
                pass


if __name__ == '__main__':
    cookie = 'your_cookie'
    header = {'User-Agent': 'your_header'}
    # 打开https://worldwidetorrents.me/，从浏览器控制台复制你的cookie和header填入此处，一定时间后会过期需重填

    file_dir = 'your_destination_folder'  # 设置你要保存到的目录

    id_lines = [line.strip().split('#')[0] for line in ID_STRING.splitlines() if line != '']
    search_list = list(set([int(line) for line in id_lines if line.isnumeric()]))
    search_list.sort()

    for user_id in search_list:
        try:
            ReadHtml(user_id, set_page=10)  # set_page表示你要抓取前多少页
        except:
            pass
文章目录