Skip to content

update_cache_references()

Class: JobsAustriaCacheProcessRework
File: jobs_austria_cache_key_sync.py ยท line 57

Step 3: Perform Bulk CASE update to sync jobs_id back to scrape_cache.

Signature

Parameters df_enriched
Returns not annotated
Async No
Visibility Public

Implementation

def update_cache_references(self, df_enriched: pd.DataFrame):
    """Step 3: Perform Bulk CASE update to sync jobs_id back to scrape_cache."""
    # 1. Filter for rows where a match was actually found
    to_update = df_enriched[df_enriched['is_in_database'] == True][['scrape_cache_id', 'jobs_id']]

    if to_update.empty:
        logs.info("No records found in 'jobs' table to sync back to cache.")
        return 0  # Return 0 so updated_count has a value

    case_parts = []
    params = {}
    ids = []

    for i, row in enumerate(to_update.itertuples()):
        s_key, j_key = f"s{i}", f"j{i}"
        case_parts.append(f"WHEN id = :{s_key} THEN :{j_key}")
        params[s_key] = row.scrape_cache_id
        params[j_key] = row.jobs_id
        ids.append(row.scrape_cache_id)

    case_stmt = " ".join(case_parts)
    query = text(f"""
        UPDATE scrape_cache 
        SET fk_job_id = CASE {case_stmt} END 
        WHERE id IN :id_list
    """)
    params["id_list"] = tuple(ids)

    with self.engine.begin() as connection:
        connection.execute(query, params)

    logs.info(f"Bulk update of {len(to_update)} rows completed.")
    return len(to_update)  # Add this return statement to provide the count