Skip to content

wright-way Prefect Flows#

The Wright Way project is a scraper that monitors the Wright Way Rescue website for new animals. There are two main prefect flows in this project: the initialization of the database, and the monitor.

wright_way.initialize #

initialize(*, engine: Engine | None = None, client: WrightWayScraper | None = None) -> None

Initialize and populate the Wright Way database.

This flow will just initialize the database and populate it with all the current animals. The expectation is that this flow will be run once before the main monitor flow is run consistently. This will ensure that the first run of the monitor flow will not spam the slack channel with hundreds of new animals.

Parameters:

Name Type Description Default
engine Engine | None

SQLAlchemy engine, if not provided, the default engine will be used. Defaults to None.

None
client WrightWayScraper | None

Wright Way Scraper client, if not provided, a new client will be created. Defaults to None.

None
Source code in wright_way/orchestration.py
@flow(  # pyright: ignore[reportCallIssue]
    name="wright-way-initialize",
    task_runner=ThreadPoolTaskRunner(max_workers=8),  # pyright: ignore[reportArgumentType]
    on_failure=[SLACK_ASSISTANT.flow_status_hook()],
    on_crashed=[SLACK_ASSISTANT.flow_status_hook()],
)
def initialize(
    *,
    engine: Engine | None = None,
    client: WrightWayScraper | None = None,
) -> None:
    """Initialize and populate the Wright Way database.

    This flow will just initialize the database and populate it with all the current
    animals. The expectation is that this flow will be run once before the main monitor
    flow is run consistently. This will ensure that the first run of the monitor flow
    will not spam the slack channel with hundreds of new animals.

    Args:
        engine (Engine | None, optional): SQLAlchemy engine, if not provided, the
            default engine will be used. Defaults to None.
        client (WrightWayScraper | None, optional): Wright Way Scraper client, if not
            provided, a new client will be created. Defaults to None.
    """
    enable_loguru_support()
    logger.info("Initializing Wright Way Animal Monitor Database")

    initialize_db(engine=engine)

    engine = resolve_engine(engine)
    client = WrightWayScraper() if client is None else client

    db_wright_way_ids = get_wright_way_db_ids(engine=engine)
    if not isinstance(db_wright_way_ids, Sequence):
        msg = f"Expected Sequence[int], not {type(db_wright_way_ids)}"
        raise TypeError(msg)
    db_wright_way_ids = set(db_wright_way_ids)
    if len(db_wright_way_ids) > 0:
        logger.info("Database already initialized")
        return

    to_add_ids = get_wright_way_ids(client=client)
    if not isinstance(to_add_ids, list):
        msg = f"Expected list, not {type(to_add_ids)}"
        raise TypeError(msg)
    to_add_ids = set(to_add_ids)
    if len(to_add_ids) > 0:
        animals = [get_animal.submit(id_=id_, client=client) for id_ in to_add_ids]
        wait(animals)
        if len(animals) > 0:
            _animals = [animal.result() for animal in animals]
            _animals = _filter_missing_profile_animals(_animals)
            add_animals_to_db(animals=_animals, engine=engine)

wright_way.monitor #

monitor(*, full_refresh: bool = False, slack_channel: str | None = None, engine: Engine | None = None, client: WrightWayScraper | None = None) -> None

Wright Way Animal Monitor main logic.

This function is the main entry point for the Wright Way Animal Monitor. It will:

  1. Initialize the database, if not already initialized.
  2. Pull all the Wright Way Petango IDs from the database and from the Wright Way website.
  3. Scrape new animals from the Wright Way website and add them to the database.
  4. Update existing animals in the database (if requested).
  5. Delete animals from the database that are no longer on the Wright Way website.

Parameters:

Name Type Description Default
full_refresh bool

Update all animals in the database, instead of just the new ones. Defaults to False.

False
slack_channel str

Slack channel to publish new animals to

None
engine Engine | None

SQLAlchemy engine, if not provided, the default engine will be used. Defaults to None.

None
client WrightWayScraper | None

Wright Way Scraper client, if not provided, a new client will be created. Defaults to None.

None
Source code in wright_way/orchestration.py
@flow(  # pyright: ignore[reportCallIssue]
    name="wright-way-animal-monitor",
    task_runner=ThreadPoolTaskRunner(max_workers=8),  # pyright: ignore[reportArgumentType]
    on_failure=[SLACK_ASSISTANT.flow_status_hook()],
    on_crashed=[SLACK_ASSISTANT.flow_status_hook()],
)
def monitor(
    *,
    full_refresh: bool = False,
    slack_channel: str | None = None,
    engine: Engine | None = None,
    client: WrightWayScraper | None = None,
) -> None:
    """Wright Way Animal Monitor main logic.

    This function is the main entry point for the Wright Way Animal Monitor. It will:

    1. Initialize the database, if not already initialized.
    2. Pull all the Wright Way Petango IDs from the database and from the Wright Way
        website.
    3. Scrape new animals from the Wright Way website and add them to the database.
    4. Update existing animals in the database (if requested).
    5. Delete animals from the database that are no longer on the Wright Way website.

    Args:
        full_refresh (bool, optional): Update all animals in the database, instead of
            just the new ones. Defaults to False.
        slack_channel (str, optional): Slack channel to publish new animals to
        engine (Engine | None, optional): SQLAlchemy engine, if not provided, the
            default engine will be used. Defaults to None.
        client (WrightWayScraper | None, optional): Wright Way Scraper client, if not
            provided, a new client will be created. Defaults to None.
    """
    enable_loguru_support()
    logger.info("Starting Wright Way Animal Monitor")

    if full_refresh:
        logger.info("Performing full refresh")
    else:
        logger.info("Performing partial refresh")

    engine = resolve_engine(engine)
    client = WrightWayScraper() if client is None else client

    initialize_db(engine=engine)

    db_wright_way_ids = get_wright_way_db_ids(engine=engine)
    if not isinstance(db_wright_way_ids, Sequence):
        msg = f"Expected Sequence[int], not {type(db_wright_way_ids)}"
        raise TypeError(msg)
    db_wright_way_ids = set(db_wright_way_ids)
    candidate_ids = get_wright_way_ids(client=client)
    if not isinstance(candidate_ids, list):
        msg = f"Expected list, not {type(candidate_ids)}"
        raise TypeError(msg)
    candidate_ids = set(candidate_ids)

    if len(to_delete := db_wright_way_ids - candidate_ids) > 0:
        delete_animals_from_db(petango_ids=to_delete, engine=engine)

    if len(to_add_ids := candidate_ids - db_wright_way_ids) > 0:
        animals = [get_animal.submit(id_=id_, client=client) for id_ in to_add_ids]
        wait(animals)
        if len(animals) > 0:
            _animals = [animal.result() for animal in animals]
            _animals = _filter_missing_profile_animals(_animals)
            add_animals_to_db(animals=_animals, engine=engine)
            if slack_channel is not None:
                _animals = _filter_puppies_only(_animals)
                wait(
                    [
                        alert_new_animal.submit(
                            animal=animal,
                            channel_name_or_id=slack_channel,
                            engine=engine,
                        )
                        for animal in _animals
                    ],
                )

    if (
        full_refresh
        and len(to_update_ids := candidate_ids - to_add_ids - to_delete) > 0
    ):
        animals = [get_animal.submit(id_=id_, client=client) for id_ in to_update_ids]
        wait(animals)
        update_animals_in_db(
            animals=[animal.result() for animal in animals],
            engine=engine,
        )