Removing duplicated job changes - note, this is to cater for a data

error which we should resolve
This commit is contained in:
2025-07-14 20:49:41 +02:00
parent bc228215ec
commit 8df02c97e8
2 changed files with 25 additions and 5 deletions

View File

@@ -17,7 +17,15 @@ latest_departments AS (
FROM {{ ref('department') }} FROM {{ ref('department') }}
), ),
-- note: Positions ID is not unique, hence removed the deduplication logic -- note: Positions ID is not unique, hence removed the full deduplication logic
-- however, we had positions with same start date while both having primary position set to true, hence only selecting a random one for now (temp workaround)
deduplicated_positions AS (
SELECT DISTINCT ON (assigned_employee_id, assigned_employee_effective_date)
*
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
WHERE _sdc_deleted_at IS NULL
ORDER BY assigned_employee_id, assigned_employee_effective_date, _sdc_received_at DESC
),
transformed_worker AS ( transformed_worker AS (
SELECT worker_hris_id, id SELECT worker_hris_id, id
@@ -33,11 +41,11 @@ position_details AS (
w.contracting_company AS new_contracting_company, w.contracting_company AS new_contracting_company,
d.path::ltree AS new_department_path, d.path::ltree AS new_department_path,
d.manager_id::BIGINT AS new_manager_id, d.manager_id::BIGINT AS new_manager_id,
p.assigned_employee_effective_date::DATE AS new_job_effective_date, COALESCE(p.assigned_employee_effective_date, w.original_hire_date)::DATE AS new_job_effective_date, -- Use original hire date if position effective date is not available (this is the case when there is no position record)
p.fte_utilized_by_employee_in_this_position * 100 AS new_fte_percentage, COALESCE(p.fte_utilized_by_employee_in_this_position * 100, 100) AS new_fte_percentage, -- Default to 100% if not specified
tw.id as worker_id tw.id as worker_id
FROM transformed_worker tw FROM transformed_worker tw
LEFT JOIN {{ source('tap_spreadsheets_anywhere', 'positions') }} p LEFT JOIN deduplicated_positions p
ON p.assigned_employee_id = tw.worker_hris_id ON p.assigned_employee_id = tw.worker_hris_id
LEFT JOIN latest_departments d LEFT JOIN latest_departments d
ON p.assigned_unit_id = d.department_hris_id ON p.assigned_unit_id = d.department_hris_id

View File

@@ -9,6 +9,7 @@ WITH latest_workers AS (
SELECT DISTINCT ON (user_id) * SELECT DISTINCT ON (user_id) *
FROM {{ source('tap_spreadsheets_anywhere', 'workers') }} FROM {{ source('tap_spreadsheets_anywhere', 'workers') }}
WHERE _sdc_deleted_at IS NULL WHERE _sdc_deleted_at IS NULL
AND user_id IS NOT NULL and user_id != '' -- Skipping empty user_ids
ORDER BY user_id, _sdc_received_at DESC ORDER BY user_id, _sdc_received_at DESC
), ),
@@ -23,6 +24,15 @@ latest_positions AS (
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date DESC ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date DESC
), ),
-- Get the first position for each worker, to set the hire date
-- This is to fix the "original hire date" column of the All users report containing dates before the first position
first_positions AS (
SELECT DISTINCT ON (assigned_employee_id) *
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
WHERE _sdc_deleted_at IS NULL
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date ASC
),
joined_data AS ( joined_data AS (
SELECT SELECT
w.user_id, w.user_id,
@@ -41,7 +51,7 @@ joined_data AS (
NULL::VARCHAR AS address_country, NULL::VARCHAR AS address_country,
NULL::VARCHAR AS phone_number, NULL::VARCHAR AS phone_number,
NULL::VARCHAR AS driver_license, NULL::VARCHAR AS driver_license,
w.original_hire_date::DATE AS employment_start, COALESCE(fp.assigned_employee_effective_date, w.original_hire_date)::DATE AS employment_start,
w.user_type AS employment_type, w.user_type AS employment_type,
w.user_contract_type AS employment_contract_type, w.user_contract_type AS employment_contract_type,
w.contracting_company AS employment_contracting_company, w.contracting_company AS employment_contracting_company,
@@ -76,6 +86,8 @@ joined_data AS (
FROM latest_workers w FROM latest_workers w
LEFT JOIN latest_positions p LEFT JOIN latest_positions p
ON w.user_id = p.assigned_employee_id ON w.user_id = p.assigned_employee_id
LEFT JOIN first_positions fp
ON w.user_id = fp.assigned_employee_id
LEFT JOIN {{ ref('department') }} d -- Source = Department from Positions report, only relevant for active workers LEFT JOIN {{ ref('department') }} d -- Source = Department from Positions report, only relevant for active workers
ON p.assigned_unit_id = d.department_hris_id ON p.assigned_unit_id = d.department_hris_id
LEFT JOIN {{ ref('department') }} d2 -- Source = Business Unit from All Users report, only relevant for inactive workers LEFT JOIN {{ ref('department') }} d2 -- Source = Business Unit from All Users report, only relevant for inactive workers