summaryrefslogtreecommitdiffstats
path: root/cip_paris_client/parsers.py
diff options
context:
space:
mode:
Diffstat (limited to 'cip_paris_client/parsers.py')
-rw-r--r--cip_paris_client/parsers.py58
1 files changed, 33 insertions, 25 deletions
diff --git a/cip_paris_client/parsers.py b/cip_paris_client/parsers.py
index e22de8e..34cc1ea 100644
--- a/cip_paris_client/parsers.py
+++ b/cip_paris_client/parsers.py
@@ -1,16 +1,13 @@
from datetime import (
date,
datetime,
+ time,
)
-from typing import TYPE_CHECKING
from selectolax.parser import HTMLParser
from cip_paris_client.schemas import Session
-if TYPE_CHECKING:
- from typing import Final
-
def is_released_next_year(
current_month: int,
@@ -25,40 +22,51 @@ def is_released_next_year(
def parse_date(date_str: str) -> date:
- """Parse the CIP date from webpage."""
- date_format: Final[str] = ""
+ """Parse date from the CIP webpage format."""
day, month = map(int, date_str.split()[1].split("/"))
-
- date = datetime.date
+ year = datetime.now().year
if is_released_next_year(datetime.now().month, month):
- print()
+ year += 1
+ return date(year=year, month=month, day=day)
-def parse_sessions(html: bytes) -> list[Session]:
- """Parse movie sessions from an html webpage."""
- movie_container_query: Final[str] = "div.movie-results-container"
- movie_name_query: Final[str] = "div.desc h3"
- sessions_container_query: Final[str] = "div.session-date > div.item"
- session_url_selector: Final[str] = "a"
- session_date_selector: Final[str] = "div.sessionDate"
- session_time_selector: Final[str] = "div p.time"
+def parse_time(time_str: str) -> time:
+ """Parse the time from the CIP webpage format."""
+ hour, minute = map(int, time_str.split(":"))
+ return time(hour=hour, minute=minute, second=0)
+
+
+def parse_sessions(
+ html: bytes,
+ movie_container_query: str = "div.movie-results-container",
+ movie_name_query: str = "div.desc h3",
+ sessions_container_query: str = "div.session-date > div.item",
+ session_url_selector: str = "a",
+ session_date_selector: str = "div.sessionDate",
+ session_time_selector: str = "div p.time",
+) -> list[Session]:
+ """Parse movie sessions from an html webpage."""
for movie_tree in HTMLParser(html).css(movie_container_query):
movie_name = movie_tree.css_first(movie_name_query).text()
for session_tree in movie_tree.css(sessions_container_query):
- attributes = session_tree.css_first(session_url_selector).attributes
- date: str = session_tree.css_first(session_date_selector).text()
- time: str = session_tree.css_first(session_time_selector).text()
-
- print(date)
- print(time)
+ # Extract attributes first, conversion is done later.
+ attributes = session_tree.css_first(
+ session_url_selector
+ ).attributes
+ date_str: str = session_tree.css_first(
+ session_date_selector
+ ).text()
+ time_str: str = session_tree.css_first(
+ session_time_selector
+ ).text()
yield Session(
url=attributes.get("href") or "",
cinema=attributes.get("title") or "",
movie=movie_name,
- date=date,
- time=time,
+ date=parse_date(date_str),
+ time=parse_time(time_str),
)
remember that computers suck.