summaryrefslogtreecommitdiffstats
path: root/cip_paris_client/parsers.py
diff options
context:
space:
mode:
Diffstat (limited to 'cip_paris_client/parsers.py')
-rw-r--r--cip_paris_client/parsers.py64
1 files changed, 64 insertions, 0 deletions
diff --git a/cip_paris_client/parsers.py b/cip_paris_client/parsers.py
new file mode 100644
index 0000000..e22de8e
--- /dev/null
+++ b/cip_paris_client/parsers.py
@@ -0,0 +1,64 @@
+from datetime import (
+ date,
+ datetime,
+)
+from typing import TYPE_CHECKING
+
+from selectolax.parser import HTMLParser
+
+from cip_paris_client.schemas import Session
+
+if TYPE_CHECKING:
+ from typing import Final
+
+
+def is_released_next_year(
+ current_month: int,
+ target_month: int,
+ month_next_year_max: int = 9,
+ month_next_year_min: int = 3,
+) -> bool:
+ return (
+ current_month >= month_next_year_max
+ and target_month <= month_next_year_min
+ )
+
+
+def parse_date(date_str: str) -> date:
+ """Parse the CIP date from webpage."""
+ date_format: Final[str] = ""
+ day, month = map(int, date_str.split()[1].split("/"))
+
+ date = datetime.date
+
+ if is_released_next_year(datetime.now().month, month):
+ print()
+
+
+def parse_sessions(html: bytes) -> list[Session]:
+ """Parse movie sessions from an html webpage."""
+ movie_container_query: Final[str] = "div.movie-results-container"
+ movie_name_query: Final[str] = "div.desc h3"
+ sessions_container_query: Final[str] = "div.session-date > div.item"
+ session_url_selector: Final[str] = "a"
+ session_date_selector: Final[str] = "div.sessionDate"
+ session_time_selector: Final[str] = "div p.time"
+
+ for movie_tree in HTMLParser(html).css(movie_container_query):
+ movie_name = movie_tree.css_first(movie_name_query).text()
+
+ for session_tree in movie_tree.css(sessions_container_query):
+ attributes = session_tree.css_first(session_url_selector).attributes
+ date: str = session_tree.css_first(session_date_selector).text()
+ time: str = session_tree.css_first(session_time_selector).text()
+
+ print(date)
+ print(time)
+
+ yield Session(
+ url=attributes.get("href") or "",
+ cinema=attributes.get("title") or "",
+ movie=movie_name,
+ date=date,
+ time=time,
+ )
remember that computers suck.