Add indexing for movies
This commit is contained in:
@@ -1,23 +1,45 @@
|
|||||||
from glob import glob
|
from glob import glob
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
series_links_dict = {}
|
||||||
|
movies_links_dict = {}
|
||||||
|
|
||||||
links_dict = {}
|
folder = Path("view/almasmovie")
|
||||||
|
|
||||||
def parse_saymyname(link: str):
|
def process_series(link: str):
|
||||||
parsed = urlparse(link)
|
parsed = urlparse(link)
|
||||||
splitted_path = parsed.path.split("/")
|
splitted_path = parsed.path.split("/")
|
||||||
first_letter = splitted_path[2]
|
first_letter = splitted_path[2]
|
||||||
|
|
||||||
first_letter_list = links_dict.get(first_letter)
|
first_letter_list = series_links_dict.get(first_letter)
|
||||||
if not first_letter_list:
|
if not first_letter_list:
|
||||||
links_dict[first_letter] = list()
|
series_links_dict[first_letter] = list()
|
||||||
|
|
||||||
joined_path = '/'.join(splitted_path[:4])
|
joined_path = "/".join(splitted_path[:4])
|
||||||
url = f"{parsed.scheme}://{parsed.hostname}{joined_path}"
|
url = f"{parsed.scheme}://{parsed.hostname}{joined_path}"
|
||||||
|
|
||||||
if not url in links_dict[first_letter]:
|
if not url in series_links_dict[first_letter]:
|
||||||
links_dict[first_letter].append(url)
|
series_links_dict[first_letter].append(url)
|
||||||
|
|
||||||
|
|
||||||
|
def process_movies(link: str):
|
||||||
|
parsed = urlparse(link)
|
||||||
|
splitted_path = parsed.path.split("/")
|
||||||
|
|
||||||
|
folder_name = splitted_path[-2]
|
||||||
|
|
||||||
|
if not movies_links_dict.get(folder_name):
|
||||||
|
year = splitted_path[-3]
|
||||||
|
name = splitted_path[-1].replace("_", " ").replace(".", " ")
|
||||||
|
joined_path = "/".join(splitted_path[: len(splitted_path) - 1])
|
||||||
|
movies_links_dict[folder_name] = (
|
||||||
|
name,
|
||||||
|
f"{parsed.scheme}://{parsed.hostname}{joined_path}",
|
||||||
|
year,
|
||||||
|
folder_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Iterating series
|
# Iterating series
|
||||||
for file in glob("data/*.saymyname.website-series.txt"):
|
for file in glob("data/*.saymyname.website-series.txt"):
|
||||||
@@ -25,22 +47,49 @@ for file in glob("data/*.saymyname.website-series.txt"):
|
|||||||
links = fp.readlines()
|
links = fp.readlines()
|
||||||
|
|
||||||
for link in links:
|
for link in links:
|
||||||
if "saymyname" in link:
|
process_series(link)
|
||||||
parse_saymyname(link)
|
|
||||||
|
for file in glob("data/*.saymyname.website-movies.txt"):
|
||||||
|
with open(file, "r") as fp:
|
||||||
|
links = fp.readlines()
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
process_movies(link)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
text = "# آرشیو الماسمووی (فقط سریال) \n\n\n"
|
series_text = "# آرشیو الماسمووی (فقط سریال) \n\n\n"
|
||||||
text += "مرتب شده بر اساس حروف الفبا.\n\n"
|
series_text += "مرتب شده بر اساس حروف الفبا.\n\n"
|
||||||
text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
|
series_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
|
||||||
text += "---\n"
|
series_text += "---\n"
|
||||||
for k, v in links_dict.items():
|
for k, v in series_links_dict.items():
|
||||||
print(f"{k}: {len(v)}")
|
|
||||||
|
|
||||||
for link in v:
|
for link in v:
|
||||||
parsed_link = urlparse(link)
|
parsed_link = urlparse(link)
|
||||||
name = parsed_link.path.split("/")[-1]
|
name = parsed_link.path.split("/")[-1]
|
||||||
text += f"- **{k}**: [{name}]({link.replace(' ', '%20')})\n\n"
|
series_text += f"- **{k}**: [{name}]({link.replace(' ', '%20')})\n\n"
|
||||||
|
|
||||||
with open("view/almasmovie/series.md", "w") as fp:
|
|
||||||
fp.write(text)
|
|
||||||
|
with open(folder / "series.md", "w") as fp:
|
||||||
|
fp.write(series_text)
|
||||||
|
|
||||||
|
movies_text = "# آرشیو الماسمووی (فقط فیلم)"
|
||||||
|
movies_text += "مرتب شده بر اساس حروف الفبا.\n\n"
|
||||||
|
movies_text += "جهت جستجو از قابلیت `find in page` مرورگر خود استفاده کنید.\n\n"
|
||||||
|
movies_text += "---\n"
|
||||||
|
|
||||||
|
for v in sorted(movies_links_dict.values(), key=lambda x: x[0]):
|
||||||
|
name = v[0]
|
||||||
|
first_letter = "0-9" if name[0].isnumeric() else name[0]
|
||||||
|
url = v[1].replace(" ", "%20")
|
||||||
|
year = v[2]
|
||||||
|
|
||||||
|
movies_text += f"- **{first_letter}**: [{name}]({url})"
|
||||||
|
|
||||||
|
if year.isnumeric() and int(year) < 3000:
|
||||||
|
movies_text += f" ({year})"
|
||||||
|
|
||||||
|
movies_text += "\n\n"
|
||||||
|
|
||||||
|
with open(folder / "movies.md", "w") as fp:
|
||||||
|
fp.write(movies_text)
|
||||||
|
|||||||
13177
view/almasmovie/movies.md
Normal file
13177
view/almasmovie/movies.md
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user