Added last_hire_date and assigned_unit_effective date logic for
worker/position
This commit is contained in:
14
meltano.yml
14
meltano.yml
@@ -9,14 +9,14 @@ environments:
|
||||
- name: tap-spreadsheets-anywhere
|
||||
config:
|
||||
tables:
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: departments
|
||||
pattern: Applicable Organizations.xlsx
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
key_properties: []
|
||||
format: excel
|
||||
worksheet_name: AO
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: positions
|
||||
pattern: "HR002.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -26,7 +26,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 1000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: workers
|
||||
pattern: "HR006.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -36,7 +36,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: performance_review_steps
|
||||
pattern: "PER001.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -46,7 +46,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: performance_review_total_scoring
|
||||
pattern: "PER002.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -56,7 +56,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: performance_review_sub_scoring
|
||||
pattern: "PER003.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
@@ -66,7 +66,7 @@ environments:
|
||||
skip_initial: 8
|
||||
sample_rate: 1
|
||||
max_sampling_read: 25000
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
|
||||
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
|
||||
name: absenteisme
|
||||
pattern: "Absenteisme.*"
|
||||
start_date: '2000-01-01T00:00:00Z'
|
||||
|
||||
@@ -20,12 +20,13 @@ latest_departments AS (
|
||||
-- note: Positions ID is not unique, hence removed the full deduplication logic
|
||||
-- however, we had positions with same start date while both having primary position set to true, hence only selecting a random one for now (temp workaround)
|
||||
deduplicated_positions AS (
|
||||
SELECT DISTINCT ON (assigned_employee_id, assigned_employee_effective_date)
|
||||
SELECT DISTINCT ON (assigned_employee_id, assigned_employee_effective_date, assigned_unit_effective_date)
|
||||
*
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
ORDER BY assigned_employee_id,
|
||||
assigned_employee_effective_date,
|
||||
assigned_employee_effective_date DESC,
|
||||
assigned_unit_effective_date DESC,
|
||||
CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC,
|
||||
_sdc_received_at DESC
|
||||
),
|
||||
@@ -44,7 +45,11 @@ position_details AS (
|
||||
w.contracting_company AS new_contracting_company,
|
||||
d.path::ltree AS new_department_path,
|
||||
d.manager_id::BIGINT AS new_manager_id,
|
||||
COALESCE(p.assigned_employee_effective_date, w.original_hire_date)::DATE AS new_job_effective_date, -- Use original hire date if position effective date is not available (this is the case when there is no position record)
|
||||
-- new job effective date:
|
||||
-- When both assigned_employee_effective_date and assigned_unit_effective_date are available, use the latest one
|
||||
-- When only one of those is available, we pick that one (greatest ignores null values)
|
||||
-- If none are available, we pick original_hire_date (this is the case if there is no position record)
|
||||
COALESCE(GREATEST(p.assigned_employee_effective_date, p.assigned_unit_effective_date), w.original_hire_date)::DATE AS new_job_effective_date,
|
||||
COALESCE(p.fte_utilized_by_employee_in_this_position * 100, 100) AS new_fte_percentage, -- Default to 100% if not specified
|
||||
tw.id as worker_id
|
||||
FROM transformed_worker tw
|
||||
|
||||
@@ -23,7 +23,7 @@ latest_positions AS (
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
-- AND primary_position = 'Yes' -- Removed this filter, as there are employees with only non-primary positions
|
||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date DESC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
|
||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date DESC, assigned_unit_effective_date DESC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
|
||||
),
|
||||
|
||||
-- Get the first position for each worker, to set the hire date
|
||||
@@ -32,13 +32,14 @@ first_positions AS (
|
||||
SELECT DISTINCT ON (assigned_employee_id) *
|
||||
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
|
||||
WHERE _sdc_deleted_at IS NULL
|
||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date ASC
|
||||
ORDER BY assigned_employee_id, _sdc_received_at DESC, GREATEST(assigned_employee_effective_date, assigned_unit_effective_date) ASC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
|
||||
),
|
||||
|
||||
joined_data AS (
|
||||
SELECT
|
||||
w.user_id,
|
||||
w.birth_date::DATE AS date_of_birth,
|
||||
-- if birth_date is 01/01/1901, we consider it NULL
|
||||
NULLIF(w.birth_date::DATE, '1901-01-01') AS date_of_birth,
|
||||
w.gender,
|
||||
w.nationality,
|
||||
NULL::VARCHAR AS first_name, -- Not available
|
||||
@@ -53,7 +54,7 @@ joined_data AS (
|
||||
NULL::VARCHAR AS address_country,
|
||||
NULL::VARCHAR AS phone_number,
|
||||
NULL::VARCHAR AS driver_license,
|
||||
COALESCE(fp.assigned_employee_effective_date, w.original_hire_date)::DATE AS employment_start,
|
||||
COALESCE(GREATEST(fp.assigned_employee_effective_date, fp.assigned_unit_effective_date), GREATEST(w.original_hire_date, w.last_hire_date))::DATE AS employment_start,
|
||||
w.user_type AS employment_type,
|
||||
w.user_contract_type AS employment_contract_type,
|
||||
w.contracting_company AS employment_contracting_company,
|
||||
|
||||
Reference in New Issue
Block a user