Skip to content

_sync_locations()

Class: JobsAustriaCacheSynchronizer
File: jobs_austria_cache_synchronizer.py · line 191

Maps url_location (zipcode) to locations.id and adds 'location_id' column. Read-only — locations table is pre-populated, no inserts needed.

Signature

Parameters df
Returns not annotated
Async No
Visibility Private

Implementation

def _sync_locations(self, df: pd.DataFrame) -> pd.DataFrame:
    """
    Maps url_location (zipcode) to locations.id and adds 'location_id' column.
    Read-only — locations table is pre-populated, no inserts needed.
    """
    if 'url_location' not in df.columns:
        df['location_id'] = None
        return df

    unique_zips = [z for z in df['url_location'].dropna().unique() if z]
    if not unique_zips:
        df['location_id'] = None
        return df

    stmt = text(
        "SELECT id, zipcode FROM locations WHERE zipcode IN :zips"
    ).bindparams(bindparam("zips", expanding=True))

    with self.engine.connect() as conn:
        rows = conn.execute(stmt, {"zips": unique_zips}).fetchall()

    lookup = {row.zipcode: row.id for row in rows}
    df['location_id'] = df['url_location'].map(lookup)
    return df