1
0
forked from logique/movies

Compare commits

17 Commits

Author SHA1 Message Date
2ea277deff Include . (dot) in regex pattern for directories 2026-01-16 21:04:57 +00:00
1ed1cb9194 Add all links for almas movie 2026-01-15 09:02:18 +03:30
0ae1356f2d Update indexer and movies.md 2026-01-15 08:43:20 +03:30
f5c6f7fe5d Update README.md 2026-01-15 08:42:29 +03:30
173397b0a3 Add indexing for movies 2026-01-15 08:41:20 +03:30
074a8e904e Add files for EhsanFilm 2026-01-14 16:29:09 +03:30
d6d1831198 Update almasmovie_indexer.py 2026-01-14 15:45:51 +03:30
b1813da978 Update README.md 2026-01-14 15:45:41 +03:30
6ce1c4437b Update series.md 2026-01-14 15:45:33 +03:30
6403265443 Rename indexer to almasmovie_indexer 2026-01-14 15:38:09 +03:30
33d78a7a33 Add series.md for almas movie 2026-01-14 15:32:46 +03:30
fba8464dae Add indexer for almas movie series 2026-01-14 15:32:27 +03:30
fe87dd1451 Add more files to data folder 2026-01-14 13:15:07 +03:30
e4007cdbc1 Better handling 2026-01-14 08:10:04 +00:00
c90a998a5d Fix join_url for absolute paths 2026-01-14 08:09:34 +00:00
f7fa3d9788 Fix fetch retry 2026-01-14 08:08:47 +00:00
efa2e7249a Merge pull request 'Add IGNORECASE flag to regex' (#1) from 0880/movies:master into master
Reviewed-on: logique/movies#1
2026-01-14 07:30:23 +00:00
11 changed files with 50692 additions and 12 deletions

View File

@@ -1,9 +1,9 @@
# movies - مجموعه آٰرشیو های مرتب شده فیلم
# movies - مجموعه آرشیو های مرتب شده فیلم
بهتر است برای جستجو در آرشیو ها از قابلیت `find in page` مرورگر خود استفاده کنید.
## آرشیو های موجود فعلی:
### Donyaye Serial
### 🍿 Donyaye Serial
- *Dubbed* and *SoftSob*
- Links:
@@ -11,11 +11,16 @@
- **Top 1000 Series**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/top_1000_series.html)
- **Top 5000 Movies**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/top_5000_movies.html)
### 💎 Almas Movie
- Links:
- **Series**: [Open](http://chai.bokhary.ir/logique/movies/src/branch/master/view/almasmovie/series.md)
- **Movies**: [Open](http://chai.bokhary.ir/logique/movies/src/branch/master/view/almasmovie/movies.md)
- **All Links**: [Open](http://chai.bokhary.ir/logique/movies/src/branch/master/view/almasmovie/all.md)
## استخراج کننده لینک
لینک های آرشیو های بالا با استفاده از یک اسکریپت جستجو و لینک های آن ها استخراج شده است. در صورت تمایل می‌توانید از اسکریپت مذکور روی هر آرشیو مشابهی استفاده کنید
لینک های آرشیو های بالا با استفاده از یک اسکریپت جستجو و لینک های آن ها استخراج شده است. در صورت تمایل می‌توانید از اسکریپت مذکور روی هر آرشیو مشابهی استفاده کنید.
### آموزش نصب پیش نیاز ها و استفاده از اسکریپت:

115
almasmovie_indexer.py Normal file
View File

@@ -0,0 +1,115 @@
from glob import glob
from urllib.parse import urlparse
from pathlib import Path
series_links_dict = {}
movies_links_dict = {}
all_links = []
folder = Path("view/almasmovie")
def process_series(link: str):
parsed = urlparse(link)
splitted_path = parsed.path.split("/")
first_letter = splitted_path[2]
first_letter_list = series_links_dict.get(first_letter)
if not first_letter_list:
series_links_dict[first_letter] = list()
joined_path = "/".join(splitted_path[:4])
url = f"{parsed.scheme}://{parsed.hostname}{joined_path}"
if not url in series_links_dict[first_letter]:
series_links_dict[first_letter].append(url)
def process_movies(link: str):
parsed = urlparse(link)
splitted_path = parsed.path.split("/")
folder_name = splitted_path[-2]
if not movies_links_dict.get(folder_name):
year = splitted_path[-3]
name = splitted_path[-1].replace("_", " ").replace(".", " ")
joined_path = "/".join(splitted_path[: len(splitted_path) - 1])
movies_links_dict[folder_name] = (
name,
f"{parsed.scheme}://{parsed.hostname}{joined_path}",
year,
folder_name,
)
# Iterating series
for file in glob("data/*.saymyname.website-series.txt"):
with open(file, "r") as fp:
links = fp.readlines()
for link in links:
process_series(link)
for file in glob("data/*.saymyname.website-movies.txt"):
with open(file, "r") as fp:
links = fp.readlines()
for link in links:
process_movies(link)
series_text = "# آرشیو الماس‌مووی (فقط سریال) \n\n\n"
series_text += "مرتب شده بر اساس حروف الفبا.\n\n"
series_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
series_text += "---\n"
for k, v in series_links_dict.items():
for link in v:
parsed_link = urlparse(link)
name = parsed_link.path.split("/")[-1]
line = f"- **{k}**: [{name}]({link.replace(' ', '%20')})\n\n"
all_links.append((name, line))
series_text += line
with open(folder / "series.md", "w") as fp:
fp.write(series_text)
movies_text = "# آرشیو الماس‌مووی (فقط فیلم) \n\n\n"
movies_text += "مرتب شده بر اساس حروف الفبا.\n\n"
movies_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
movies_text += "---\n"
for v in sorted(movies_links_dict.values(), key=lambda x: x[0]):
name = v[0]
first_letter = "0-9" if name[0].isnumeric() else name[0]
url = v[1].replace(" ", "%20")
year = v[2]
line = f"- **{first_letter}**: [{name}]({url})"
if year.isnumeric() and int(year) < 3000:
line += f" ({year})"
line += "\n\n"
all_links.append((name, line))
movies_text += line
with open(folder / "movies.md", "w") as fp:
fp.write(movies_text)
all_links.sort(key=lambda x: x[0])
all_text = "# آرشیو الماس‌مووی \n\n\n"
all_text += "مرتب شده بر اساس حروف الفبا.\n\n"
all_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
all_text += "---\n"
for v in all_links:
all_text += v[1]
with open(folder / "all.md", "w") as fp:
fp.write(all_text)

10993
data/dl1.ehsansub.sbs-all.txt Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

25
main.py
View File

@@ -5,11 +5,11 @@ import aiohttp
from aiohttp import ClientError
folder_hyperlink_pat: re.Pattern = re.compile(
r"\<a href=\"\/?([a-zA-Z0-9_ -]+\/)\"\>\s*\<code\>", re.IGNORECASE
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)+\"\>\s*\<code\>", re.IGNORECASE
)
movie_hyperlink_pat: re.Pattern = re.compile(
r"\<a href=\"\/?([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>\s*\<code\>",
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)*([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>\s*\<code\>",
re.IGNORECASE,
)
@@ -23,26 +23,33 @@ async def fetch(url, tries=0):
print(f"Failed ({tries + 1})...", end="")
if tries >= 5:
raise RuntimeError(f"Failed to fetch URL {url} after 6 tries")
return fetch(url, tries + 1)
return await fetch(url, tries + 1)
except Exception as e:
print(f"Unexpected error: {e}")
return None
def join_url(a: str, b: str) -> str:
return a.rstrip("/") + "/" + b.lstrip("/")
if b.startswith("/"):
return a[: a.rfind(".") + a[a.rfind(".") :].find("/")] + b
return a.rstrip("/") + "/" + b
async def traverse(pool: list[str], url: str, verbose=False) -> None:
page = await fetch(url)
folders = re.findall(folder_hyperlink_pat, page)
files = re.findall(movie_hyperlink_pat, page)
folders = folder_hyperlink_pat.findall(page)
files = movie_hyperlink_pat.findall(page)
for f in folders:
if f in url:
continue
if isinstance(f, tuple):
f = f[-1]
if verbose:
print(f" -> {join_url(url, f)}")
await traverse(pool, join_url(url, f), verbose=verbose)
for f in files:
file_name = f[0]
file_name = f[-2]
if verbose:
print(f"{join_url(url, file_name)} ({len(pool)})")
pool.append(join_url(url, file_name))
@@ -76,11 +83,11 @@ async def main():
)
if not is_fancy:
folder_hyperlink_pat = re.compile(
r"\<a href=\"\/?([a-zA-Z0-9_ -]+\/)\"\>", re.IGNORECASE
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)+\"\>", re.IGNORECASE
)
movie_hyperlink_pat = re.compile(
r"\<a href=\"\/?([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>",
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)*([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>",
re.IGNORECASE,
)
tasks.append(traverse(movies, url, is_verbose))

0
view/.gitkeep Normal file
View File

14484
view/almasmovie/all.md Normal file

File diff suppressed because it is too large Load Diff

13180
view/almasmovie/movies.md Normal file

File diff suppressed because it is too large Load Diff

1312
view/almasmovie/series.md Normal file

File diff suppressed because it is too large Load Diff