update_cache_references()¶
Class: JobsAustriaCacheProcessRework
File: jobs_austria_cache_key_sync.py ยท line 57
Step 3: Perform Bulk CASE update to sync jobs_id back to scrape_cache.
Signature¶
| Parameters | df_enriched |
| Returns | not annotated |
| Async | No |
| Visibility | Public |
Implementation¶
def update_cache_references(self, df_enriched: pd.DataFrame):
"""Step 3: Perform Bulk CASE update to sync jobs_id back to scrape_cache."""
# 1. Filter for rows where a match was actually found
to_update = df_enriched[df_enriched['is_in_database'] == True][['scrape_cache_id', 'jobs_id']]
if to_update.empty:
logs.info("No records found in 'jobs' table to sync back to cache.")
return 0 # Return 0 so updated_count has a value
case_parts = []
params = {}
ids = []
for i, row in enumerate(to_update.itertuples()):
s_key, j_key = f"s{i}", f"j{i}"
case_parts.append(f"WHEN id = :{s_key} THEN :{j_key}")
params[s_key] = row.scrape_cache_id
params[j_key] = row.jobs_id
ids.append(row.scrape_cache_id)
case_stmt = " ".join(case_parts)
query = text(f"""
UPDATE scrape_cache
SET fk_job_id = CASE {case_stmt} END
WHERE id IN :id_list
""")
params["id_list"] = tuple(ids)
with self.engine.begin() as connection:
connection.execute(query, params)
logs.info(f"Bulk update of {len(to_update)} rows completed.")
return len(to_update) # Add this return statement to provide the count