Files
movies/almasmovie_indexer.py
2026-01-15 09:02:18 +03:30

116 lines
3.3 KiB
Python

from glob import glob
from urllib.parse import urlparse
from pathlib import Path
series_links_dict = {}
movies_links_dict = {}
all_links = []
folder = Path("view/almasmovie")
def process_series(link: str):
parsed = urlparse(link)
splitted_path = parsed.path.split("/")
first_letter = splitted_path[2]
first_letter_list = series_links_dict.get(first_letter)
if not first_letter_list:
series_links_dict[first_letter] = list()
joined_path = "/".join(splitted_path[:4])
url = f"{parsed.scheme}://{parsed.hostname}{joined_path}"
if not url in series_links_dict[first_letter]:
series_links_dict[first_letter].append(url)
def process_movies(link: str):
parsed = urlparse(link)
splitted_path = parsed.path.split("/")
folder_name = splitted_path[-2]
if not movies_links_dict.get(folder_name):
year = splitted_path[-3]
name = splitted_path[-1].replace("_", " ").replace(".", " ")
joined_path = "/".join(splitted_path[: len(splitted_path) - 1])
movies_links_dict[folder_name] = (
name,
f"{parsed.scheme}://{parsed.hostname}{joined_path}",
year,
folder_name,
)
# Iterating series
for file in glob("data/*.saymyname.website-series.txt"):
with open(file, "r") as fp:
links = fp.readlines()
for link in links:
process_series(link)
for file in glob("data/*.saymyname.website-movies.txt"):
with open(file, "r") as fp:
links = fp.readlines()
for link in links:
process_movies(link)
series_text = "# آرشیو الماس‌مووی (فقط سریال) \n\n\n"
series_text += "مرتب شده بر اساس حروف الفبا.\n\n"
series_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
series_text += "---\n"
for k, v in series_links_dict.items():
for link in v:
parsed_link = urlparse(link)
name = parsed_link.path.split("/")[-1]
line = f"- **{k}**: [{name}]({link.replace(' ', '%20')})\n\n"
all_links.append((name, line))
series_text += line
with open(folder / "series.md", "w") as fp:
fp.write(series_text)
movies_text = "# آرشیو الماس‌مووی (فقط فیلم) \n\n\n"
movies_text += "مرتب شده بر اساس حروف الفبا.\n\n"
movies_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
movies_text += "---\n"
for v in sorted(movies_links_dict.values(), key=lambda x: x[0]):
name = v[0]
first_letter = "0-9" if name[0].isnumeric() else name[0]
url = v[1].replace(" ", "%20")
year = v[2]
line = f"- **{first_letter}**: [{name}]({url})"
if year.isnumeric() and int(year) < 3000:
line += f" ({year})"
line += "\n\n"
all_links.append((name, line))
movies_text += line
with open(folder / "movies.md", "w") as fp:
fp.write(movies_text)
all_links.sort(key=lambda x: x[0])
all_text = "# آرشیو الماس‌مووی \n\n\n"
all_text += "مرتب شده بر اساس حروف الفبا.\n\n"
all_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
all_text += "---\n"
for v in all_links:
all_text += v[1]
with open(folder / "all.md", "w") as fp:
fp.write(all_text)