torah_dl

ContentExtractionError

Bases: ExtractionError

Raised when required content cannot be extracted from the page.

Source code in src/torah_dl/core/exceptions.py

class ContentExtractionError(ExtractionError):
    """Raised when required content cannot be extracted from the page."""

    pass

DownloadError

Bases: TorahDLError

Raised when there are issues during the download process.

Source code in src/torah_dl/core/exceptions.py

class DownloadError(TorahDLError):
    """Raised when there are issues during the download process."""

    pass

DownloadURLError

Bases: ContentExtractionError

Raised when the download URL cannot be found or extracted.

Source code in src/torah_dl/core/exceptions.py

class DownloadURLError(ContentExtractionError):
    """Raised when the download URL cannot be found or extracted."""

    pass

Extraction

Bases: BaseModel

Represents the extracted data from a source.

Source code in src/torah_dl/core/models.py

class Extraction(BaseModel):
    """Represents the extracted data from a source."""

    title: str | None = None
    download_url: str
    file_format: str | None = None
    file_name: str | None = None

ExtractionError

Bases: TorahDLError

Base class for all extraction-related errors.

Source code in src/torah_dl/core/exceptions.py

class ExtractionError(TorahDLError):
    """Base class for all extraction-related errors."""

    pass

ExtractorNotFoundError

Bases: ExtractionError

Raised when no extractor is found for a given URL.

Source code in src/torah_dl/core/exceptions.py

class ExtractorNotFoundError(ExtractionError):
    """Raised when no extractor is found for a given URL."""

    pass

NetworkError

Bases: ExtractionError

Raised when there are network-related issues during content extraction.

Source code in src/torah_dl/core/exceptions.py

class NetworkError(ExtractionError):
    """Raised when there are network-related issues during content extraction."""

    pass

TitleExtractionError

Bases: ContentExtractionError

Raised when the title cannot be found or decoded.

Source code in src/torah_dl/core/exceptions.py

class TitleExtractionError(ContentExtractionError):
    """Raised when the title cannot be found or decoded."""

    pass

TorahDLError

Bases: Exception

Base exception class for all torah-dl errors.

Source code in src/torah_dl/core/exceptions.py

class TorahDLError(Exception):
    """Base exception class for all torah-dl errors."""

    pass

can_handle

can_handle(url: str) -> bool

Checks if a given URL can be handled by any extractor.

Source code in src/torah_dl/core/extract.py

def can_handle(url: str) -> bool:
    """Checks if a given URL can be handled by any extractor."""
    return any(extractor.can_handle(url) for extractor in EXTRACTORS)

download

download(url: str, output_path: Path, timeout: int = 30)

Download a file from a given URL and save it to the specified output path.

Parameters:

Name	Type	Description	Default
`url`	`str`	The URL to download from	required
`output_path`	`Path`	The path to save the downloaded file to	required
`timeout`	`int`	The timeout for the request	`30`

Source code in src/torah_dl/core/download.py

def download(url: str, output_path: Path, timeout: int = 30):
    """Download a file from a given URL and save it to the specified output path.

    Args:
        url: The URL to download from
        output_path: The path to save the downloaded file to
        timeout: The timeout for the request
    """
    try:
        response = requests.get(url, timeout=timeout)
        response.raise_for_status()

    except requests.RequestException as e:
        raise DownloadError(url) from e

    with open(output_path, "wb") as f:
        _ = f.write(response.content)

extract

extract(url: str) -> Extraction

Extracts the download URL, title, and file format from a given URL.

Source code in src/torah_dl/core/extract.py

def extract(url: str) -> Extraction:
    """Extracts the download URL, title, and file format from a given URL."""
    for extractor in EXTRACTORS:
        if extractor.can_handle(url):
            return extractor.extract(url)

    raise ExtractorNotFoundError(url)

list_extractors

list_extractors() -> dict[str, str]

List all available extractors.

Source code in src/torah_dl/core/list.py

def list_extractors() -> dict[str, str]:
    """List all available extractors."""
    return {extractor.name: extractor.homepage for extractor in EXTRACTORS}