from glob import glob from urllib.parse import urlparse from pathlib import Path series_links_dict = {} movies_links_dict = {} folder = Path("view/almasmovie") def process_series(link: str): parsed = urlparse(link) splitted_path = parsed.path.split("/") first_letter = splitted_path[2] first_letter_list = series_links_dict.get(first_letter) if not first_letter_list: series_links_dict[first_letter] = list() joined_path = "/".join(splitted_path[:4]) url = f"{parsed.scheme}://{parsed.hostname}{joined_path}" if not url in series_links_dict[first_letter]: series_links_dict[first_letter].append(url) def process_movies(link: str): parsed = urlparse(link) splitted_path = parsed.path.split("/") folder_name = splitted_path[-2] if not movies_links_dict.get(folder_name): year = splitted_path[-3] name = splitted_path[-1].replace("_", " ").replace(".", " ") joined_path = "/".join(splitted_path[: len(splitted_path) - 1]) movies_links_dict[folder_name] = ( name, f"{parsed.scheme}://{parsed.hostname}{joined_path}", year, folder_name, ) # Iterating series for file in glob("data/*.saymyname.website-series.txt"): with open(file, "r") as fp: links = fp.readlines() for link in links: process_series(link) for file in glob("data/*.saymyname.website-movies.txt"): with open(file, "r") as fp: links = fp.readlines() for link in links: process_movies(link) series_text = "# آرشیو الماس‌مووی (فقط سریال) \n\n\n" series_text += "مرتب شده بر اساس حروف الفبا.\n\n" series_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n" series_text += "---\n" for k, v in series_links_dict.items(): for link in v: parsed_link = urlparse(link) name = parsed_link.path.split("/")[-1] series_text += f"- **{k}**: [{name}]({link.replace(' ', '%20')})\n\n" with open(folder / "series.md", "w") as fp: fp.write(series_text) movies_text = "# آرشیو الماس‌مووی (فقط فیلم) \n\n\n" movies_text += "مرتب شده بر اساس حروف الفبا.\n\n" movies_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n" movies_text += "---\n" for v in sorted(movies_links_dict.values(), key=lambda x: x[0]): name = v[0] first_letter = "0-9" if name[0].isnumeric() else name[0] url = v[1].replace(" ", "%20") year = v[2] movies_text += f"- **{first_letter}**: [{name}]({url})" if year.isnumeric() and int(year) < 3000: movies_text += f" ({year})" movies_text += "\n\n" with open(folder / "movies.md", "w") as fp: fp.write(movies_text)