diff options
Diffstat (limited to 'cip_paris_client/parsers.py')
-rw-r--r-- | cip_paris_client/parsers.py | 58 |
1 files changed, 33 insertions, 25 deletions
diff --git a/cip_paris_client/parsers.py b/cip_paris_client/parsers.py index e22de8e..34cc1ea 100644 --- a/cip_paris_client/parsers.py +++ b/cip_paris_client/parsers.py @@ -1,16 +1,13 @@ from datetime import ( date, datetime, + time, ) -from typing import TYPE_CHECKING from selectolax.parser import HTMLParser from cip_paris_client.schemas import Session -if TYPE_CHECKING: - from typing import Final - def is_released_next_year( current_month: int, @@ -25,40 +22,51 @@ def is_released_next_year( def parse_date(date_str: str) -> date: - """Parse the CIP date from webpage.""" - date_format: Final[str] = "" + """Parse date from the CIP webpage format.""" day, month = map(int, date_str.split()[1].split("/")) - - date = datetime.date + year = datetime.now().year if is_released_next_year(datetime.now().month, month): - print() + year += 1 + return date(year=year, month=month, day=day) -def parse_sessions(html: bytes) -> list[Session]: - """Parse movie sessions from an html webpage.""" - movie_container_query: Final[str] = "div.movie-results-container" - movie_name_query: Final[str] = "div.desc h3" - sessions_container_query: Final[str] = "div.session-date > div.item" - session_url_selector: Final[str] = "a" - session_date_selector: Final[str] = "div.sessionDate" - session_time_selector: Final[str] = "div p.time" +def parse_time(time_str: str) -> time: + """Parse the time from the CIP webpage format.""" + hour, minute = map(int, time_str.split(":")) + return time(hour=hour, minute=minute, second=0) + + +def parse_sessions( + html: bytes, + movie_container_query: str = "div.movie-results-container", + movie_name_query: str = "div.desc h3", + sessions_container_query: str = "div.session-date > div.item", + session_url_selector: str = "a", + session_date_selector: str = "div.sessionDate", + session_time_selector: str = "div p.time", +) -> list[Session]: + """Parse movie sessions from an html webpage.""" for movie_tree in HTMLParser(html).css(movie_container_query): movie_name = movie_tree.css_first(movie_name_query).text() for session_tree in movie_tree.css(sessions_container_query): - attributes = session_tree.css_first(session_url_selector).attributes - date: str = session_tree.css_first(session_date_selector).text() - time: str = session_tree.css_first(session_time_selector).text() - - print(date) - print(time) + # Extract attributes first, conversion is done later. + attributes = session_tree.css_first( + session_url_selector + ).attributes + date_str: str = session_tree.css_first( + session_date_selector + ).text() + time_str: str = session_tree.css_first( + session_time_selector + ).text() yield Session( url=attributes.get("href") or "", cinema=attributes.get("title") or "", movie=movie_name, - date=date, - time=time, + date=parse_date(date_str), + time=parse_time(time_str), ) |