Better handling

This commit is contained in:
2026-01-14 08:10:04 +00:00
parent c90a998a5d
commit e4007cdbc1

19
main.py
View File

@@ -5,11 +5,11 @@ import aiohttp
from aiohttp import ClientError from aiohttp import ClientError
folder_hyperlink_pat: re.Pattern = re.compile( folder_hyperlink_pat: re.Pattern = re.compile(
r"\<a href=\"\/?([a-zA-Z0-9_ -]+\/)\"\>\s*\<code\>", re.IGNORECASE r"\<a href=\"(\/?[a-zA-Z0-9_ \[\]-]+\/)+\"\>\s*\<code\>", re.IGNORECASE
) )
movie_hyperlink_pat: re.Pattern = re.compile( movie_hyperlink_pat: re.Pattern = re.compile(
r"\<a href=\"\/?([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>\s*\<code\>", r"\<a href=\"(\/?[a-zA-Z0-9_ \[\]-]+\/)*([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>\s*\<code\>",
re.IGNORECASE, re.IGNORECASE,
) )
@@ -37,14 +37,19 @@ def join_url(a: str, b: str) -> str:
async def traverse(pool: list[str], url: str, verbose=False) -> None: async def traverse(pool: list[str], url: str, verbose=False) -> None:
page = await fetch(url) page = await fetch(url)
folders = re.findall(folder_hyperlink_pat, page) folders = folder_hyperlink_pat.findall(page)
files = re.findall(movie_hyperlink_pat, page) files = movie_hyperlink_pat.findall(page)
for f in folders: for f in folders:
if f in url:
continue
if isinstance(f, tuple):
f = f[-1]
if verbose: if verbose:
print(f" -> {join_url(url, f)}") print(f" -> {join_url(url, f)}")
await traverse(pool, join_url(url, f), verbose=verbose) await traverse(pool, join_url(url, f), verbose=verbose)
for f in files: for f in files:
file_name = f[0] file_name = f[-2]
if verbose: if verbose:
print(f"{join_url(url, file_name)} ({len(pool)})") print(f"{join_url(url, file_name)} ({len(pool)})")
pool.append(join_url(url, file_name)) pool.append(join_url(url, file_name))
@@ -78,11 +83,11 @@ async def main():
) )
if not is_fancy: if not is_fancy:
folder_hyperlink_pat = re.compile( folder_hyperlink_pat = re.compile(
r"\<a href=\"\/?([a-zA-Z0-9_ -]+\/)\"\>", re.IGNORECASE r"\<a href=\"(\/?[a-zA-Z0-9_ \[\]-]+\/)+\"\>", re.IGNORECASE
) )
movie_hyperlink_pat = re.compile( movie_hyperlink_pat = re.compile(
r"\<a href=\"\/?([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>", r"\<a href=\"(\/?[a-zA-Z0-9_ \[\]-]+\/)*([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>",
re.IGNORECASE, re.IGNORECASE,
) )
tasks.append(traverse(movies, url, is_verbose)) tasks.append(traverse(movies, url, is_verbose))