diff options
Diffstat (limited to 'cip_paris_client/parsers.py')
-rw-r--r-- | cip_paris_client/parsers.py | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/cip_paris_client/parsers.py b/cip_paris_client/parsers.py new file mode 100644 index 0000000..e22de8e --- /dev/null +++ b/cip_paris_client/parsers.py @@ -0,0 +1,64 @@ +from datetime import ( + date, + datetime, +) +from typing import TYPE_CHECKING + +from selectolax.parser import HTMLParser + +from cip_paris_client.schemas import Session + +if TYPE_CHECKING: + from typing import Final + + +def is_released_next_year( + current_month: int, + target_month: int, + month_next_year_max: int = 9, + month_next_year_min: int = 3, +) -> bool: + return ( + current_month >= month_next_year_max + and target_month <= month_next_year_min + ) + + +def parse_date(date_str: str) -> date: + """Parse the CIP date from webpage.""" + date_format: Final[str] = "" + day, month = map(int, date_str.split()[1].split("/")) + + date = datetime.date + + if is_released_next_year(datetime.now().month, month): + print() + + +def parse_sessions(html: bytes) -> list[Session]: + """Parse movie sessions from an html webpage.""" + movie_container_query: Final[str] = "div.movie-results-container" + movie_name_query: Final[str] = "div.desc h3" + sessions_container_query: Final[str] = "div.session-date > div.item" + session_url_selector: Final[str] = "a" + session_date_selector: Final[str] = "div.sessionDate" + session_time_selector: Final[str] = "div p.time" + + for movie_tree in HTMLParser(html).css(movie_container_query): + movie_name = movie_tree.css_first(movie_name_query).text() + + for session_tree in movie_tree.css(sessions_container_query): + attributes = session_tree.css_first(session_url_selector).attributes + date: str = session_tree.css_first(session_date_selector).text() + time: str = session_tree.css_first(session_time_selector).text() + + print(date) + print(time) + + yield Session( + url=attributes.get("href") or "", + cinema=attributes.get("title") or "", + movie=movie_name, + date=date, + time=time, + ) |