Skip to content

get_pending_cache()

Class: JobsAustriaCacheProcessRework
File: jobs_austria_cache_key_sync.py ยท line 32

Step 1: Extract 1000 rows from scrape_cache where fk_job_id is NULL.

Signature

Parameters none
Returns not annotated
Async No
Visibility Public

Implementation

def get_pending_cache(self) -> pd.DataFrame:
    """Step 1: Extract 1000 rows from scrape_cache where fk_job_id is NULL."""
    query = "SELECT id as scrape_cache_id, url_hash FROM scrape_cache WHERE fk_job_id IS NULL LIMIT 1000"
    bound_strategy = partial(self._mysql_extraction_strategy, query=query)

    extractor = PandasExtractor(extraction_strategy=bound_strategy)
    return extractor.run()