Compare commits

...

27 Commits

Author SHA1 Message Date
2ea277deff Include . (dot) in regex pattern for directories 2026-01-16 21:04:57 +00:00
1ed1cb9194 Add all links for almas movie 2026-01-15 09:02:18 +03:30
0ae1356f2d Update indexer and movies.md 2026-01-15 08:43:20 +03:30
f5c6f7fe5d Update README.md 2026-01-15 08:42:29 +03:30
173397b0a3 Add indexing for movies 2026-01-15 08:41:20 +03:30
074a8e904e Add files for EhsanFilm 2026-01-14 16:29:09 +03:30
d6d1831198 Update almasmovie_indexer.py 2026-01-14 15:45:51 +03:30
b1813da978 Update README.md 2026-01-14 15:45:41 +03:30
6ce1c4437b Update series.md 2026-01-14 15:45:33 +03:30
6403265443 Rename indexer to almasmovie_indexer 2026-01-14 15:38:09 +03:30
33d78a7a33 Add series.md for almas movie 2026-01-14 15:32:46 +03:30
fba8464dae Add indexer for almas movie series 2026-01-14 15:32:27 +03:30
fe87dd1451 Add more files to data folder 2026-01-14 13:15:07 +03:30
e4007cdbc1 Better handling 2026-01-14 08:10:04 +00:00
c90a998a5d Fix join_url for absolute paths 2026-01-14 08:09:34 +00:00
f7fa3d9788 Fix fetch retry 2026-01-14 08:08:47 +00:00
efa2e7249a Merge pull request 'Add IGNORECASE flag to regex' (#1) from 0880/movies:master into master
Reviewed-on: #1
2026-01-14 07:30:23 +00:00
0880
56273f1836 Add IGNORECASE flag to regex 2026-01-14 10:57:38 +03:30
dd33fc5743 Update README.md 2026-01-14 10:14:47 +03:30
d23c1740db Update README.md 2026-01-14 10:14:00 +03:30
23005f1050 Create archives.txt 2026-01-14 10:12:23 +03:30
3e9ac476f9 Add data folder 2026-01-14 10:11:57 +03:30
ad6a37a74e Update README.md 2026-01-14 09:47:42 +03:30
4ee68db2ec Update README.md 2026-01-14 09:23:59 +03:30
2dc01bd130 Update README.md 2026-01-14 09:02:49 +03:30
a78647076e Add requirements.txt and main.py 2026-01-14 08:10:44 +03:30
1b0fff5e65 Update README.md 2026-01-14 08:10:24 +03:30
18 changed files with 110057 additions and 2 deletions

View File

@@ -1,3 +1,35 @@
# movies # movies - مجموعه آرشیو های مرتب شده فیلم
Some movies بهتر است برای جستجو در آرشیو ها از قابلیت `find in page` مرورگر خود استفاده کنید.
## آرشیو های موجود فعلی:
### 🍿 Donyaye Serial
- *Dubbed* and *SoftSob*
- Links:
- **All links**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/donyaye_serial_all_archive.html)
- **Top 1000 Series**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/top_1000_series.html)
- **Top 5000 Movies**: [Open](https://dls2.iran-gamecenter-host.com/DonyayeSerial/top_5000_movies.html)
### 💎 Almas Movie
- Links:
- **Series**: [Open](http://chai.bokhary.ir/logique/movies/src/branch/master/view/almasmovie/series.md)
- **Movies**: [Open](http://chai.bokhary.ir/logique/movies/src/branch/master/view/almasmovie/movies.md)
- **All Links**: [Open](http://chai.bokhary.ir/logique/movies/src/branch/master/view/almasmovie/all.md)
## استخراج کننده لینک
لینک های آرشیو های بالا با استفاده از یک اسکریپت جستجو و لینک های آن ها استخراج شده است. در صورت تمایل می‌توانید از اسکریپت مذکور روی هر آرشیو مشابهی استفاده کنید.
### آموزش نصب پیش نیاز ها و استفاده از اسکریپت:
**کلون کردن و دریافت فایل با استفاده از گیت:**
```bash
git clone http://chai.bokhary.ir/logique/movies.git
```
**نصب بسته ها با استفاده از:**
```bash
pip install -i https://mirror-pypi.runflare.com/simple -r requirements.txt
```

115
almasmovie_indexer.py Normal file
View File

@@ -0,0 +1,115 @@
from glob import glob
from urllib.parse import urlparse
from pathlib import Path
series_links_dict = {}
movies_links_dict = {}
all_links = []
folder = Path("view/almasmovie")
def process_series(link: str):
parsed = urlparse(link)
splitted_path = parsed.path.split("/")
first_letter = splitted_path[2]
first_letter_list = series_links_dict.get(first_letter)
if not first_letter_list:
series_links_dict[first_letter] = list()
joined_path = "/".join(splitted_path[:4])
url = f"{parsed.scheme}://{parsed.hostname}{joined_path}"
if not url in series_links_dict[first_letter]:
series_links_dict[first_letter].append(url)
def process_movies(link: str):
parsed = urlparse(link)
splitted_path = parsed.path.split("/")
folder_name = splitted_path[-2]
if not movies_links_dict.get(folder_name):
year = splitted_path[-3]
name = splitted_path[-1].replace("_", " ").replace(".", " ")
joined_path = "/".join(splitted_path[: len(splitted_path) - 1])
movies_links_dict[folder_name] = (
name,
f"{parsed.scheme}://{parsed.hostname}{joined_path}",
year,
folder_name,
)
# Iterating series
for file in glob("data/*.saymyname.website-series.txt"):
with open(file, "r") as fp:
links = fp.readlines()
for link in links:
process_series(link)
for file in glob("data/*.saymyname.website-movies.txt"):
with open(file, "r") as fp:
links = fp.readlines()
for link in links:
process_movies(link)
series_text = "# آرشیو الماس‌مووی (فقط سریال) \n\n\n"
series_text += "مرتب شده بر اساس حروف الفبا.\n\n"
series_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
series_text += "---\n"
for k, v in series_links_dict.items():
for link in v:
parsed_link = urlparse(link)
name = parsed_link.path.split("/")[-1]
line = f"- **{k}**: [{name}]({link.replace(' ', '%20')})\n\n"
all_links.append((name, line))
series_text += line
with open(folder / "series.md", "w") as fp:
fp.write(series_text)
movies_text = "# آرشیو الماس‌مووی (فقط فیلم) \n\n\n"
movies_text += "مرتب شده بر اساس حروف الفبا.\n\n"
movies_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
movies_text += "---\n"
for v in sorted(movies_links_dict.values(), key=lambda x: x[0]):
name = v[0]
first_letter = "0-9" if name[0].isnumeric() else name[0]
url = v[1].replace(" ", "%20")
year = v[2]
line = f"- **{first_letter}**: [{name}]({url})"
if year.isnumeric() and int(year) < 3000:
line += f" ({year})"
line += "\n\n"
all_links.append((name, line))
movies_text += line
with open(folder / "movies.md", "w") as fp:
fp.write(movies_text)
all_links.sort(key=lambda x: x[0])
all_text = "# آرشیو الماس‌مووی \n\n\n"
all_text += "مرتب شده بر اساس حروف الفبا.\n\n"
all_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
all_text += "---\n"
for v in all_links:
all_text += v[1]
with open(folder / "all.md", "w") as fp:
fp.write(all_text)

8
archives.txt Normal file
View File

@@ -0,0 +1,8 @@
https://dls2.iran-gamecenter-host.com/DonyayeSerial/donyaye_serial_all_archive.html
https://berlin.saymyname.website/Movies
https://tokyo.saymyname.website/Movies
https://nairobi.saymyname.website/Movies
https://tokyo.saymyname.website/Series
https://nairobi.saymyname.website/Series
https://dl1.ehsansub.sbs/
https://dl2.ehsansub.sbs/

File diff suppressed because it is too large Load Diff

10993
data/dl1.ehsansub.sbs-all.txt Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

100
main.py Normal file
View File

@@ -0,0 +1,100 @@
import asyncio
import re
import aiohttp
from aiohttp import ClientError
folder_hyperlink_pat: re.Pattern = re.compile(
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)+\"\>\s*\<code\>", re.IGNORECASE
)
movie_hyperlink_pat: re.Pattern = re.compile(
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)*([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>\s*\<code\>",
re.IGNORECASE,
)
async def fetch(url, tries=0):
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
return await response.text()
except ClientError:
print(f"Failed ({tries + 1})...", end="")
if tries >= 5:
raise RuntimeError(f"Failed to fetch URL {url} after 6 tries")
return await fetch(url, tries + 1)
except Exception as e:
print(f"Unexpected error: {e}")
return None
def join_url(a: str, b: str) -> str:
if b.startswith("/"):
return a[: a.rfind(".") + a[a.rfind(".") :].find("/")] + b
return a.rstrip("/") + "/" + b
async def traverse(pool: list[str], url: str, verbose=False) -> None:
page = await fetch(url)
folders = folder_hyperlink_pat.findall(page)
files = movie_hyperlink_pat.findall(page)
for f in folders:
if f in url:
continue
if isinstance(f, tuple):
f = f[-1]
if verbose:
print(f" -> {join_url(url, f)}")
await traverse(pool, join_url(url, f), verbose=verbose)
for f in files:
file_name = f[-2]
if verbose:
print(f"{join_url(url, file_name)} ({len(pool)})")
pool.append(join_url(url, file_name))
URLS: list[str] = ["https://dl1.ehsansub.sbs/"]
movies: list[str] = []
def ask(q: str, default=True) -> bool:
y = "Y" if default else "y"
n = "n" if default else "N"
while True:
a = input(f"{q} [{y}|{n}]: ").lower()
if not a:
return default
if a == "y" or a == "n":
return a == "y"
is_verbose = ask("Verbose?")
async def main():
global folder_hyperlink_pat, movie_hyperlink_pat
tasks = []
for url in URLS:
is_fancy = ask(
f"Is {url[url.find('/') + 2 : url.rfind('.') + url[url.rfind('.') :].find('/')]} fancy?"
)
if not is_fancy:
folder_hyperlink_pat = re.compile(
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)+\"\>", re.IGNORECASE
)
movie_hyperlink_pat = re.compile(
r"\<a href=\"(\/?[a-zA-Z0-9_ \[\].-]+\/)*([a-zA-Z0-9_. -]+\.?(mp4|mkv|avi|mov|wmv|webm))\"\>",
re.IGNORECASE,
)
tasks.append(traverse(movies, url, is_verbose))
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(main())
open("movies.txt", mode="w", encoding="utf-8").write("\n".join(movies))

10
requirements.txt Normal file
View File

@@ -0,0 +1,10 @@
aiohappyeyeballs==2.6.1
aiohttp==3.13.3
aiosignal==1.4.0
attrs==25.4.0
frozenlist==1.8.0
idna==3.11
multidict==6.7.0
propcache==0.4.1
typing_extensions==4.15.0
yarl==1.22.0

0
view/.gitkeep Normal file
View File

14484
view/almasmovie/all.md Normal file

File diff suppressed because it is too large Load Diff

13180
view/almasmovie/movies.md Normal file

File diff suppressed because it is too large Load Diff

1312
view/almasmovie/series.md Normal file

File diff suppressed because it is too large Load Diff