Skip to content

_build_configs()

Class: JobsAustriaDetailsETL
File: jobs_austria_details_scraping.py ยท line 166

Route URLs by portal, then split each portal group into batches of URL_BATCH_SIZE. Returns one Apify config dict per batch.

Signature

Parameters urls
Returns list
Async No
Visibility Private

Implementation

def _build_configs(urls: list[str]) -> list[dict]:
    """
    Route URLs by portal, then split each portal group into batches
    of URL_BATCH_SIZE. Returns one Apify config dict per batch.
    """
    router = PortalRouter()
    portal_groups = router.route(urls)

    configs = []
    for portal, portal_urls in portal_groups.items():
        if portal == PortalRouter.UNKNOWN:
            logs.info(f"Skipping {len(portal_urls)} URLs with unsupported portal.")
            continue
        run_input_name = PortalRouter._PORTAL_INPUTS[portal]
        for i in range(0, len(portal_urls), URL_BATCH_SIZE):
            configs.append({
                "actor_id": ACTOR_ID,
                "run_input": {
                    "input": run_input_name,
                    "urls": portal_urls[i:i + URL_BATCH_SIZE],
                },
            })
    return configs