Compare commits
20 Commits
23005f1050
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 2ea277deff | |||
| 1ed1cb9194 | |||
| 0ae1356f2d | |||
| f5c6f7fe5d | |||
| 173397b0a3 | |||
| 074a8e904e | |||
| d6d1831198 | |||
| b1813da978 | |||
| 6ce1c4437b | |||
| 6403265443 | |||
| 33d78a7a33 | |||
| fba8464dae | |||
| fe87dd1451 | |||
| e4007cdbc1 | |||
| c90a998a5d | |||
| f7fa3d9788 | |||
| efa2e7249a | |||
|
|
56273f1836 | ||
| dd33fc5743 | |||
| d23c1740db |
15
README.md
15
README.md
@@ -1,21 +1,26 @@
|
|||||||
# movies - مجموعه آٰرشیو های مرتب شده فیلم
|
# movies - مجموعه آرشیو های مرتب شده فیلم
|
||||||
|
|
||||||
|
بهتر است برای جستجو در آرشیو ها از قابلیت `find in page` مرورگر خود استفاده کنید.
|
||||||
## آرشیو های موجود فعلی:
|
## آرشیو های موجود فعلی:
|
||||||
|
|
||||||
### Donyaye Serial
|
### 🍿 Donyaye Serial
|
||||||
|
|
||||||
- *Dubbed* and *SoftSob*
|
- *Dubbed* and *SoftSob*
|
||||||
- Storages: [dls2.iran-gamecenter-host.com](https://dls2.iran-gamecenter-host.com/DonyayeSerial/), [dls.iran-gamecenter-host.com](https://dls.iran-gamecenter-host.com/DonyayeSerial/)
|
- Links:
|
||||||
- Additional Links:
|
|
||||||
- **All links**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/donyaye_serial_all_archive.html)
|
- **All links**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/donyaye_serial_all_archive.html)
|
||||||
- **Top 1000 Series**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/top_1000_series.html)
|
- **Top 1000 Series**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/top_1000_series.html)
|
||||||
- **Top 5000 Movies**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/top_5000_movies.html)
|
- **Top 5000 Movies**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/top_5000_movies.html)
|
||||||
|
|
||||||
|
### 💎 Almas Movie
|
||||||
|
|
||||||
|
- Links:
|
||||||
|
- **Series**: [Open](http://chai.bokhary.ir/logique/movies/src/branch/master/view/almasmovie/series.md)
|
||||||
|
- **Movies**: [Open](http://chai.bokhary.ir/logique/movies/src/branch/master/view/almasmovie/movies.md)
|
||||||
|
- **All Links**: [Open](http://chai.bokhary.ir/logique/movies/src/branch/master/view/almasmovie/all.md)
|
||||||
|
|
||||||
## استخراج کننده لینک
|
## استخراج کننده لینک
|
||||||
|
|
||||||
لینک های آرشیو های بالا با استفاده از یک اسکریپت جستجو و لینک های آن ها استخراج شده است. در صورت تمایل میتوانید از اسکریپت مذکور روی هر آرشیو مشابهی استفاده کنید
|
لینک های آرشیو های بالا با استفاده از یک اسکریپت جستجو و لینک های آن ها استخراج شده است. در صورت تمایل میتوانید از اسکریپت مذکور روی هر آرشیو مشابهی استفاده کنید.
|
||||||
|
|
||||||
### آموزش نصب پیش نیاز ها و استفاده از اسکریپت:
|
### آموزش نصب پیش نیاز ها و استفاده از اسکریپت:
|
||||||
|
|
||||||
|
|||||||
115
almasmovie_indexer.py
Normal file
115
almasmovie_indexer.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
from glob import glob
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
series_links_dict = {}
|
||||||
|
movies_links_dict = {}
|
||||||
|
all_links = []
|
||||||
|
|
||||||
|
folder = Path("view/almasmovie")
|
||||||
|
|
||||||
|
def process_series(link: str):
|
||||||
|
parsed = urlparse(link)
|
||||||
|
splitted_path = parsed.path.split("/")
|
||||||
|
first_letter = splitted_path[2]
|
||||||
|
|
||||||
|
first_letter_list = series_links_dict.get(first_letter)
|
||||||
|
if not first_letter_list:
|
||||||
|
series_links_dict[first_letter] = list()
|
||||||
|
|
||||||
|
joined_path = "/".join(splitted_path[:4])
|
||||||
|
url = f"{parsed.scheme}://{parsed.hostname}{joined_path}"
|
||||||
|
|
||||||
|
if not url in series_links_dict[first_letter]:
|
||||||
|
series_links_dict[first_letter].append(url)
|
||||||
|
|
||||||
|
|
||||||
|
def process_movies(link: str):
|
||||||
|
parsed = urlparse(link)
|
||||||
|
splitted_path = parsed.path.split("/")
|
||||||
|
|
||||||
|
folder_name = splitted_path[-2]
|
||||||
|
|
||||||
|
if not movies_links_dict.get(folder_name):
|
||||||
|
year = splitted_path[-3]
|
||||||
|
name = splitted_path[-1].replace("_", " ").replace(".", " ")
|
||||||
|
joined_path = "/".join(splitted_path[: len(splitted_path) - 1])
|
||||||
|
movies_links_dict[folder_name] = (
|
||||||
|
name,
|
||||||
|
f"{parsed.scheme}://{parsed.hostname}{joined_path}",
|
||||||
|
year,
|
||||||
|
folder_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Iterating series
|
||||||
|
for file in glob("data/*.saymyname.website-series.txt"):
|
||||||
|
with open(file, "r") as fp:
|
||||||
|
links = fp.readlines()
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
process_series(link)
|
||||||
|
|
||||||
|
for file in glob("data/*.saymyname.website-movies.txt"):
|
||||||
|
with open(file, "r") as fp:
|
||||||
|
links = fp.readlines()
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
process_movies(link)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
series_text = "# آرشیو الماسمووی (فقط سریال) \n\n\n"
|
||||||
|
series_text += "مرتب شده بر اساس حروف الفبا.\n\n"
|
||||||
|
series_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
|
||||||
|
series_text += "---\n"
|
||||||
|
for k, v in series_links_dict.items():
|
||||||
|
for link in v:
|
||||||
|
parsed_link = urlparse(link)
|
||||||
|
name = parsed_link.path.split("/")[-1]
|
||||||
|
line = f"- **{k}**: [{name}]({link.replace(' ', '%20')})\n\n"
|
||||||
|
all_links.append((name, line))
|
||||||
|
series_text += line
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
with open(folder / "series.md", "w") as fp:
|
||||||
|
fp.write(series_text)
|
||||||
|
|
||||||
|
movies_text = "# آرشیو الماسمووی (فقط فیلم) \n\n\n"
|
||||||
|
movies_text += "مرتب شده بر اساس حروف الفبا.\n\n"
|
||||||
|
movies_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
|
||||||
|
movies_text += "---\n"
|
||||||
|
|
||||||
|
for v in sorted(movies_links_dict.values(), key=lambda x: x[0]):
|
||||||
|
name = v[0]
|
||||||
|
first_letter = "0-9" if name[0].isnumeric() else name[0]
|
||||||
|
url = v[1].replace(" ", "%20")
|
||||||
|
year = v[2]
|
||||||
|
|
||||||
|
line = f"- **{first_letter}**: [{name}]({url})"
|
||||||
|
|
||||||
|
if year.isnumeric() and int(year) < 3000:
|
||||||
|
line += f" ({year})"
|
||||||
|
|
||||||
|
line += "\n\n"
|
||||||
|
|
||||||
|
all_links.append((name, line))
|
||||||
|
movies_text += line
|
||||||
|
|
||||||
|
|
||||||
|
with open(folder / "movies.md", "w") as fp:
|
||||||
|
fp.write(movies_text)
|
||||||
|
|
||||||
|
all_links.sort(key=lambda x: x[0])
|
||||||
|
|
||||||
|
all_text = "# آرشیو الماسمووی \n\n\n"
|
||||||
|
all_text += "مرتب شده بر اساس حروف الفبا.\n\n"
|
||||||
|
all_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
|
||||||
|
all_text += "---\n"
|
||||||
|
|
||||||
|
for v in all_links:
|
||||||
|
all_text += v[1]
|
||||||
|
|
||||||
|
with open(folder / "all.md", "w") as fp:
|
||||||
|
fp.write(all_text)
|
||||||
10993
data/dl1.ehsansub.sbs-all.txt
Normal file
10993
data/dl1.ehsansub.sbs-all.txt
Normal file
File diff suppressed because it is too large
Load Diff
4087
data/dl1.ehsansub.sbs-series.txt
Normal file
4087
data/dl1.ehsansub.sbs-series.txt
Normal file
File diff suppressed because it is too large
Load Diff
4936
data/dl2.ehsansub.sbs-all.txt
Normal file
4936
data/dl2.ehsansub.sbs-all.txt
Normal file
File diff suppressed because it is too large
Load Diff
1561
data/dl2.ehsansub.sbs-series.txt
Normal file
1561
data/dl2.ehsansub.sbs-series.txt
Normal file
File diff suppressed because it is too large
Load Diff
31
main.py
31
main.py
@@ -5,11 +5,12 @@ import aiohttp
|
|||||||
from aiohttp import ClientError
|
from aiohttp import ClientError
|
||||||
|
|
||||||
folder_hyperlink_pat: re.Pattern = re.compile(
|
folder_hyperlink_pat: re.Pattern = re.compile(
|
||||||
r"\<a href=\"\/?([a-zA-Z0-9_ -]+\/)\"\>\s*\<code\>"
|
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)+\"\>\s*\<code\>", re.IGNORECASE
|
||||||
)
|
)
|
||||||
|
|
||||||
movie_hyperlink_pat: re.Pattern = re.compile(
|
movie_hyperlink_pat: re.Pattern = re.compile(
|
||||||
r"\<a href=\"\/?([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>\s*\<code\>"
|
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)*([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>\s*\<code\>",
|
||||||
|
re.IGNORECASE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -22,32 +23,39 @@ async def fetch(url, tries=0):
|
|||||||
print(f"Failed ({tries + 1})...", end="")
|
print(f"Failed ({tries + 1})...", end="")
|
||||||
if tries >= 5:
|
if tries >= 5:
|
||||||
raise RuntimeError(f"Failed to fetch URL {url} after 6 tries")
|
raise RuntimeError(f"Failed to fetch URL {url} after 6 tries")
|
||||||
return fetch(url, tries + 1)
|
return await fetch(url, tries + 1)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Unexpected error: {e}")
|
print(f"Unexpected error: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def join_url(a: str, b: str) -> str:
|
def join_url(a: str, b: str) -> str:
|
||||||
return a.rstrip("/") + "/" + b.lstrip("/")
|
if b.startswith("/"):
|
||||||
|
return a[: a.rfind(".") + a[a.rfind(".") :].find("/")] + b
|
||||||
|
return a.rstrip("/") + "/" + b
|
||||||
|
|
||||||
|
|
||||||
async def traverse(pool: list[str], url: str, verbose=False) -> None:
|
async def traverse(pool: list[str], url: str, verbose=False) -> None:
|
||||||
page = await fetch(url)
|
page = await fetch(url)
|
||||||
folders = re.findall(folder_hyperlink_pat, page)
|
folders = folder_hyperlink_pat.findall(page)
|
||||||
files = re.findall(movie_hyperlink_pat, page)
|
files = movie_hyperlink_pat.findall(page)
|
||||||
|
|
||||||
for f in folders:
|
for f in folders:
|
||||||
|
if f in url:
|
||||||
|
continue
|
||||||
|
if isinstance(f, tuple):
|
||||||
|
f = f[-1]
|
||||||
if verbose:
|
if verbose:
|
||||||
print(f" -> {join_url(url, f)}")
|
print(f" -> {join_url(url, f)}")
|
||||||
await traverse(pool, join_url(url, f), verbose=verbose)
|
await traverse(pool, join_url(url, f), verbose=verbose)
|
||||||
for f in files:
|
for f in files:
|
||||||
file_name = f[0]
|
file_name = f[-2]
|
||||||
if verbose:
|
if verbose:
|
||||||
print(f"{join_url(url, file_name)} ({len(pool)})")
|
print(f"{join_url(url, file_name)} ({len(pool)})")
|
||||||
pool.append(join_url(url, file_name))
|
pool.append(join_url(url, file_name))
|
||||||
|
|
||||||
|
|
||||||
URLS: list[str] = ["https://berlin.saymyname.website/Movies/"]
|
URLS: list[str] = ["https://dl1.ehsansub.sbs/"]
|
||||||
|
|
||||||
movies: list[str] = []
|
movies: list[str] = []
|
||||||
|
|
||||||
@@ -74,10 +82,13 @@ async def main():
|
|||||||
f"Is {url[url.find('/') + 2 : url.rfind('.') + url[url.rfind('.') :].find('/')]} fancy?"
|
f"Is {url[url.find('/') + 2 : url.rfind('.') + url[url.rfind('.') :].find('/')]} fancy?"
|
||||||
)
|
)
|
||||||
if not is_fancy:
|
if not is_fancy:
|
||||||
folder_hyperlink_pat = re.compile(r"\<a href=\"\/?([a-zA-Z0-9_ -]+\/)\"\>")
|
folder_hyperlink_pat = re.compile(
|
||||||
|
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)+\"\>", re.IGNORECASE
|
||||||
|
)
|
||||||
|
|
||||||
movie_hyperlink_pat = re.compile(
|
movie_hyperlink_pat = re.compile(
|
||||||
r"\<a href=\"\/?([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>"
|
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)*([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>",
|
||||||
|
re.IGNORECASE,
|
||||||
)
|
)
|
||||||
tasks.append(traverse(movies, url, is_verbose))
|
tasks.append(traverse(movies, url, is_verbose))
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
|
|||||||
0
view/.gitkeep
Normal file
0
view/.gitkeep
Normal file
14484
view/almasmovie/all.md
Normal file
14484
view/almasmovie/all.md
Normal file
File diff suppressed because it is too large
Load Diff
13180
view/almasmovie/movies.md
Normal file
13180
view/almasmovie/movies.md
Normal file
File diff suppressed because it is too large
Load Diff
1312
view/almasmovie/series.md
Normal file
1312
view/almasmovie/series.md
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user