from glob import glob from urllib.parse import urlparse links_dict = {} def parse_saymyname(link: str): parsed = urlparse(link) splitted_path = parsed.path.split("/") first_letter = splitted_path[2] first_letter_list = links_dict.get(first_letter) if not first_letter_list: links_dict[first_letter] = list() joined_path = '/'.join(splitted_path[:4]) url = f"{parsed.scheme}://{parsed.hostname}{joined_path}" if not url in links_dict[first_letter]: links_dict[first_letter].append(url) # Iterating series for file in glob("data/*.saymyname.website-series.txt"): with open(file, "r") as fp: links = fp.readlines() for link in links: if "saymyname" in link: parse_saymyname(link) text = "# Almas Movie Archive (Only Series)" for k, v in links_dict.items(): print(f"{k}: {len(v)}") for link in v: parsed_link = urlparse(link) name = parsed_link.path.split("/")[-1] text += f"{k}: [{name}]({link.replace(' ', '%20')})\n\n" with open("series.md", "w") as fp: fp.write(text)