Skip to content

_fetch_pending_urls()

async Class: JobsAustriaDetailsETL
File: jobs_austria_details_scraping.py ยท line 153

Fetch URLs of jobs that haven't had details extracted yet.

Signature

Parameters none
Returns list
Async Yes
Visibility Private

Implementation

async def _fetch_pending_urls(self) -> list[str]:
    """Fetch URLs of jobs that haven't had details extracted yet."""
    engine = self._create_engine()
    try:
        async with engine.connect() as conn:
            result = await conn.execute(
                text("SELECT url FROM jobs WHERE order_number IS NULL AND url IS NOT NULL")
            )
            return [row.url for row in result.fetchall()]
    finally:
        await engine.dispose()