fix(downloads): prefer library metadata for author_title filenames with fallback stems
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
"""Obtains AAX files from Audible (cache or download) and provides activation bytes."""
|
"""Obtains AAX files from Audible (cache or download) and provides activation bytes."""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import unicodedata
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
@@ -33,26 +34,31 @@ class DownloadManager:
|
|||||||
self.cache_dir = cache_dir
|
self.cache_dir = cache_dir
|
||||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
self.chunk_size = chunk_size
|
self.chunk_size = chunk_size
|
||||||
self._http_client = httpx.Client(
|
self._http_client = httpx.Client(auth=auth, timeout=30.0, follow_redirects=True)
|
||||||
auth=auth, timeout=30.0, follow_redirects=True)
|
|
||||||
self._download_client = httpx.Client(
|
self._download_client = httpx.Client(
|
||||||
timeout=httpx.Timeout(connect=30.0, read=None,
|
timeout=httpx.Timeout(connect=30.0, read=None, write=30.0, pool=30.0),
|
||||||
write=30.0, pool=30.0),
|
|
||||||
follow_redirects=True,
|
follow_redirects=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_or_download(
|
def get_or_download(
|
||||||
self, asin: str, notify: StatusCallback | None = None
|
self,
|
||||||
|
asin: str,
|
||||||
|
notify: StatusCallback | None = None,
|
||||||
|
preferred_title: str | None = None,
|
||||||
|
preferred_author: str | None = None,
|
||||||
) -> Path | None:
|
) -> Path | None:
|
||||||
"""Return local path to AAX file; download and cache if not present."""
|
"""Return local path to AAX file; download and cache if not present."""
|
||||||
title = self._get_name_from_asin(asin) or asin
|
filename_stems = self._get_filename_stems_from_asin(
|
||||||
safe_title = self._sanitize_filename(title)
|
asin,
|
||||||
local_path = self.cache_dir / f"{safe_title}.aax"
|
preferred_title=preferred_title,
|
||||||
|
preferred_author=preferred_author,
|
||||||
if local_path.exists() and local_path.stat().st_size >= MIN_FILE_SIZE:
|
)
|
||||||
|
local_path = self.cache_dir / f"{filename_stems[0]}.aax"
|
||||||
|
cached_path = self._find_cached_path(filename_stems)
|
||||||
|
if cached_path:
|
||||||
if notify:
|
if notify:
|
||||||
notify(f"Using cached file: {local_path.name}")
|
notify(f"Using cached file: {cached_path.name}")
|
||||||
return local_path
|
return cached_path
|
||||||
|
|
||||||
if notify:
|
if notify:
|
||||||
notify(f"Downloading to {local_path.name}...")
|
notify(f"Downloading to {local_path.name}...")
|
||||||
@@ -92,12 +98,7 @@ class DownloadManager:
|
|||||||
|
|
||||||
def get_cached_path(self, asin: str) -> Path | None:
|
def get_cached_path(self, asin: str) -> Path | None:
|
||||||
"""Return path to cached AAX file if it exists and is valid size."""
|
"""Return path to cached AAX file if it exists and is valid size."""
|
||||||
title = self._get_name_from_asin(asin) or asin
|
return self._find_cached_path(self._get_filename_stems_from_asin(asin))
|
||||||
safe_title = self._sanitize_filename(title)
|
|
||||||
local_path = self.cache_dir / f"{safe_title}.aax"
|
|
||||||
if local_path.exists() and local_path.stat().st_size >= MIN_FILE_SIZE:
|
|
||||||
return local_path
|
|
||||||
return None
|
|
||||||
|
|
||||||
def is_cached(self, asin: str) -> bool:
|
def is_cached(self, asin: str) -> bool:
|
||||||
"""Return True if the title is present in cache with valid size."""
|
"""Return True if the title is present in cache with valid size."""
|
||||||
@@ -130,20 +131,68 @@ class DownloadManager:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def _sanitize_filename(self, filename: str) -> str:
|
def _sanitize_filename(self, filename: str) -> str:
|
||||||
"""Remove invalid characters from filename."""
|
"""Normalize a filename segment with ASCII letters, digits, and dashes."""
|
||||||
return re.sub(r'[<>:"/\\|?*]', "_", filename)
|
ascii_text = unicodedata.normalize("NFKD", filename)
|
||||||
|
ascii_text = ascii_text.encode("ascii", "ignore").decode("ascii")
|
||||||
|
ascii_text = re.sub(r"[’'`]+", "", ascii_text)
|
||||||
|
ascii_text = re.sub(r"[^A-Za-z0-9]+", "-", ascii_text)
|
||||||
|
ascii_text = re.sub(r"-+", "-", ascii_text)
|
||||||
|
ascii_text = ascii_text.strip("-._")
|
||||||
|
return ascii_text or "Unknown"
|
||||||
|
|
||||||
|
def _find_cached_path(self, filename_stems: list[str]) -> Path | None:
|
||||||
|
"""Return the first valid cached path matching any candidate filename stem."""
|
||||||
|
for filename_stem in filename_stems:
|
||||||
|
local_path = self.cache_dir / f"{filename_stem}.aax"
|
||||||
|
if local_path.exists() and local_path.stat().st_size >= MIN_FILE_SIZE:
|
||||||
|
return local_path
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_filename_stems_from_asin(
|
||||||
|
self,
|
||||||
|
asin: str,
|
||||||
|
preferred_title: str | None = None,
|
||||||
|
preferred_author: str | None = None,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Build preferred and fallback cache filename stems for an ASIN."""
|
||||||
|
if preferred_title:
|
||||||
|
preferred_combined = (
|
||||||
|
f"{self._sanitize_filename(preferred_author or 'Unknown Author')}_"
|
||||||
|
f"{self._sanitize_filename(preferred_title)}"
|
||||||
|
)
|
||||||
|
preferred_legacy = self._sanitize_filename(preferred_title)
|
||||||
|
fallback_asin = self._sanitize_filename(asin)
|
||||||
|
return list(
|
||||||
|
dict.fromkeys([preferred_combined, preferred_legacy, fallback_asin])
|
||||||
|
)
|
||||||
|
|
||||||
def _get_name_from_asin(self, asin: str) -> str | None:
|
|
||||||
"""Get the title/name of a book from its ASIN."""
|
|
||||||
try:
|
try:
|
||||||
product_info = self.client.get(
|
product_info = self.client.get(
|
||||||
path=f"1.0/catalog/products/{asin}",
|
path=f"1.0/catalog/products/{asin}",
|
||||||
response_groups="product_desc,product_attrs",
|
**{"response_groups": "contributors,product_desc,product_attrs"},
|
||||||
)
|
)
|
||||||
product = product_info.get("product", {})
|
product = product_info.get("product", {})
|
||||||
return product.get("title") or "Unknown Title"
|
title = product.get("title") or "Unknown Title"
|
||||||
except (OSError, ValueError, KeyError):
|
author = self._get_primary_author(product)
|
||||||
return None
|
combined = (
|
||||||
|
f"{self._sanitize_filename(author)}_{self._sanitize_filename(title)}"
|
||||||
|
)
|
||||||
|
legacy_title = self._sanitize_filename(title)
|
||||||
|
fallback_asin = self._sanitize_filename(asin)
|
||||||
|
return list(dict.fromkeys([combined, legacy_title, fallback_asin]))
|
||||||
|
except (OSError, ValueError, KeyError, AttributeError):
|
||||||
|
return [self._sanitize_filename(asin)]
|
||||||
|
|
||||||
|
def _get_primary_author(self, product: dict) -> str:
|
||||||
|
"""Extract a primary author name from product metadata."""
|
||||||
|
contributors = product.get("authors") or product.get("contributors") or []
|
||||||
|
for contributor in contributors:
|
||||||
|
if not isinstance(contributor, dict):
|
||||||
|
continue
|
||||||
|
name = contributor.get("name")
|
||||||
|
if isinstance(name, str) and name.strip():
|
||||||
|
return name
|
||||||
|
return "Unknown Author"
|
||||||
|
|
||||||
def _get_download_link(
|
def _get_download_link(
|
||||||
self,
|
self,
|
||||||
@@ -174,7 +223,8 @@ class DownloadManager:
|
|||||||
if not link:
|
if not link:
|
||||||
link = str(response.url)
|
link = str(response.url)
|
||||||
|
|
||||||
tld = self.auth.locale.domain
|
locale = getattr(self.auth, "locale", None)
|
||||||
|
tld = getattr(locale, "domain", "com")
|
||||||
return link.replace("cds.audible.com", f"cds.audible.{tld}")
|
return link.replace("cds.audible.com", f"cds.audible.{tld}")
|
||||||
|
|
||||||
except httpx.HTTPError as exc:
|
except httpx.HTTPError as exc:
|
||||||
|
|||||||
Reference in New Issue
Block a user