from glob import glob from urllib.parse import urlparse links_dict = {} def parse_saymyname(link: str): parsed = urlparse(link) splitted_path = parsed.path.split("/") first_letter = splitted_path[2] first_letter_list = links_dict.get(first_letter) if not first_letter_list: links_dict[first_letter] = list() joined_path = '/'.join(splitted_path[:4]) url = f"{parsed.scheme}://{parsed.hostname}{joined_path}" if not url in links_dict[first_letter]: links_dict[first_letter].append(url) # Iterating series for file in glob("data/*.saymyname.website-series.txt"): with open(file, "r") as fp: links = fp.readlines() for link in links: if "saymyname" in link: parse_saymyname(link) text = "# آرشیو الماس‌مووی (فقط سریال) \n\n\n" text += "مرتب شده بر اساس حروف الفبا.\n\n" text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n" text += "---\n" for k, v in links_dict.items(): print(f"{k}: {len(v)}") for link in v: parsed_link = urlparse(link) name = parsed_link.path.split("/")[-1] text += f"- **{k}**: [{name}]({link.replace(' ', '%20')})\n\n" with open("view/almasmovie/series.md", "w") as fp: fp.write(text)