from datetime import ( date, datetime, time, ) from selectolax.parser import HTMLParser from cip_paris_client.schemas import Session def is_released_next_year( current_month: int, target_month: int, month_next_year_max: int = 9, month_next_year_min: int = 3, ) -> bool: return ( current_month >= month_next_year_max and target_month <= month_next_year_min ) def parse_date(date_str: str) -> date: """Parse date from the CIP webpage format.""" day, month = map(int, date_str.split()[1].split("/")) year = datetime.now().year if is_released_next_year(datetime.now().month, month): year += 1 return date(year=year, month=month, day=day) def parse_time(time_str: str) -> time: """Parse the time from the CIP webpage format.""" hour, minute = map(int, time_str.split(":")) return time(hour=hour, minute=minute, second=0) def parse_sessions( html: bytes, movie_container_query: str = "div.movie-results-container", movie_name_query: str = "div.desc h3", sessions_container_query: str = "div.session-date > div.item", session_url_selector: str = "a", session_date_selector: str = "div.sessionDate", session_time_selector: str = "div p.time", ) -> list[Session]: """Parse movie sessions from an html webpage.""" for movie_tree in HTMLParser(html).css(movie_container_query): movie_name = movie_tree.css_first(movie_name_query).text() for session_tree in movie_tree.css(sessions_container_query): # Extract attributes first, conversion is done later. attributes = session_tree.css_first( session_url_selector ).attributes date_str: str = session_tree.css_first( session_date_selector ).text() time_str: str = session_tree.css_first( session_time_selector ).text() yield Session( url=attributes.get("href") or "", cinema=attributes.get("title") or "", movie=movie_name, date=parse_date(date_str), time=parse_time(time_str), )