Compare commits

...

6 Commits

Author SHA1 Message Date
b3ad962b41 Move docker registry to gitea 2025-08-20 10:41:15 +02:00
eaaf51f699 Add logic to add "not started" steps for performance reviews 2025-08-12 14:00:45 +02:00
461a7eb9aa Split out Performance Review 2024 into Generic and n-1 managers 2025-08-12 13:04:44 +02:00
9a7059dcdf Ignore CAD and CSAD departments, no workers 2025-08-12 12:06:36 +02:00
735250c4c7 Added last_hire_date and assigned_unit_effective date logic for
worker/position
2025-08-12 10:24:53 +02:00
aa19a53d51 Minor fixes after workshop
Worker: Also consider non-primary positions for the department
identification
Worker: Don't load users with non-numeric IDs
Job change: Select position with primary = Yes, if it exists
2025-07-18 11:35:15 +02:00
9 changed files with 220 additions and 30 deletions

View File

@@ -17,3 +17,75 @@ TRUNCATE TABLE raw.performance_review_sub_scoring;
TRUNCATE TABLE raw.performance_review_total_scoring;
TRUNCATE TABLE raw.positions;
TRUNCATE TABLE raw.workers;
DROP TABLE raw.absenteisme;
DROP TABLE raw.departments;
DROP TABLE raw.performance_review_steps;
DROP TABLE raw.performance_review_sub_scoring;
DROP TABLE raw.performance_review_total_scoring;
DROP TABLE raw.positions;
DROP TABLE raw.workers;
Data quality checks:
-- Total worker count
SELECT count(*)
from clean.worker;
-- Active workers
SELECT count(*)
from clean.worker
where employment_exit_date is null;
-- Number of job change / position change records
WITH history_counts AS (
SELECT w.id, count(*) as history_count
from clean.worker w
left join clean.job_change jc on w.id = jc.worker_id
group by w.id
)
SELECT history_count, count(*)
from history_counts
group by history_count
order by history_count;
-- Years at the company
WITH yac AS (
SELECT
w.id,
EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company
FROM clean.worker w
)
SELECT
yac.years_at_company,
COUNT(*)
FROM yac
GROUP BY yac.years_at_company
ORDER BY yac.years_at_company
-- Worker id's with < 0 years at company or > 60 years
WITH yac AS (
SELECT
w.id, w.worker_hris_id, w.employment_start, w.employment_exit_date,
EXTRACT('YEAR' FROM AGE(COALESCE(employment_exit_date, CURRENT_DATE), employment_start)) AS years_at_company
FROM clean.worker w
)
SELECT *
from yac
where years_at_company < 0 or years_at_company > 60;
-- Performance review: number of steps loaded
select c.name, s.name, count(*)
from performance_review_step s
inner join performance_review r on r.id = s.review_id
inner join performance_cycle c on c.id = r.cycle_id
group by c.name, s.name, s.sequence_number
order by c.name, s.sequence_number;
-- Steps not linked to a review
select count(*) from performance_review_step
where review_id is null;

View File

@@ -1,5 +1,5 @@
x-meltano-image: &meltano-image
image: docker.hrlakehouse.com/lakehouse/sarens-integration:latest
image: gitea.jvtech.be/lakehouse/sarens-integration:latest
services:
meltano:

View File

@@ -9,14 +9,14 @@ environments:
- name: tap-spreadsheets-anywhere
config:
tables:
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
name: departments
pattern: Applicable Organizations.xlsx
start_date: '2000-01-01T00:00:00Z'
key_properties: []
format: excel
worksheet_name: AO
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
name: positions
pattern: "HR002.*"
start_date: '2000-01-01T00:00:00Z'
@@ -26,7 +26,7 @@ environments:
skip_initial: 8
sample_rate: 1
max_sampling_read: 1000
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
name: workers
pattern: "HR006.*"
start_date: '2000-01-01T00:00:00Z'
@@ -36,7 +36,7 @@ environments:
skip_initial: 8
sample_rate: 1
max_sampling_read: 25000
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
name: performance_review_steps
pattern: "PER001.*"
start_date: '2000-01-01T00:00:00Z'
@@ -46,7 +46,7 @@ environments:
skip_initial: 8
sample_rate: 1
max_sampling_read: 25000
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
name: performance_review_total_scoring
pattern: "PER002.*"
start_date: '2000-01-01T00:00:00Z'
@@ -56,7 +56,7 @@ environments:
skip_initial: 8
sample_rate: 1
max_sampling_read: 25000
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
name: performance_review_sub_scoring
pattern: "PER003.*"
start_date: '2000-01-01T00:00:00Z'
@@ -66,7 +66,7 @@ environments:
skip_initial: 8
sample_rate: 1
max_sampling_read: 25000
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-06-11/original/
- path: file://C:/Users/vdsje/OneDrive/LakeHouse/Sarens/Data/2025-08-08/original/
name: absenteisme
pattern: "Absenteisme.*"
start_date: '2000-01-01T00:00:00Z'

View File

@@ -13,6 +13,7 @@ latest_departments as (
from {{ source('tap_spreadsheets_anywhere', 'departments') }}
) t
where rn = 1
and id not in ('CAD', 'CSAD')
),
department_tree as (
-- Anchor: top-level department (parent_id is set to Sarens Group in the Excel)

View File

@@ -20,11 +20,15 @@ latest_departments AS (
-- note: Positions ID is not unique, hence removed the full deduplication logic
-- however, we had positions with same start date while both having primary position set to true, hence only selecting a random one for now (temp workaround)
deduplicated_positions AS (
SELECT DISTINCT ON (assigned_employee_id, assigned_employee_effective_date)
SELECT DISTINCT ON (assigned_employee_id, assigned_employee_effective_date, assigned_unit_effective_date)
*
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
WHERE _sdc_deleted_at IS NULL
ORDER BY assigned_employee_id, assigned_employee_effective_date, _sdc_received_at DESC
ORDER BY assigned_employee_id,
assigned_employee_effective_date DESC,
assigned_unit_effective_date DESC,
CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC,
_sdc_received_at DESC
),
transformed_worker AS (
@@ -41,7 +45,11 @@ position_details AS (
w.contracting_company AS new_contracting_company,
d.path::ltree AS new_department_path,
d.manager_id::BIGINT AS new_manager_id,
COALESCE(p.assigned_employee_effective_date, w.original_hire_date)::DATE AS new_job_effective_date, -- Use original hire date if position effective date is not available (this is the case when there is no position record)
-- new job effective date:
-- When both assigned_employee_effective_date and assigned_unit_effective_date are available, use the latest one
-- When only one of those is available, we pick that one (greatest ignores null values)
-- If none are available, we pick original_hire_date (this is the case if there is no position record)
COALESCE(GREATEST(p.assigned_employee_effective_date, p.assigned_unit_effective_date), w.original_hire_date)::DATE AS new_job_effective_date,
COALESCE(p.fte_utilized_by_employee_in_this_position * 100, 100) AS new_fte_percentage, -- Default to 100% if not specified
tw.id as worker_id
FROM transformed_worker tw

View File

@@ -17,13 +17,46 @@ with distinct_cycles as (
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }}
--where is_not_removed_from_task = 1
group by task_name
),
base_records as (
select
dense_rank() over (order by task_name)::bigint as id,
cycle_start_date::date as start_date,
cycle_end_date::date as end_date,
task_name as name,
'Closed' as status, -- overwritten logic for Sarens
'annual' as type
from distinct_cycles
),
-- Generate additional records for Performance Review 2024
additional_records as (
select
(select max(id) from base_records) + 1 as id,
start_date,
end_date,
'Performance Review 2024 - Generic' as name,
status,
type
from base_records
where name = 'Performance Review 2024'
union all
select
(select max(id) from base_records) + 2 as id,
start_date,
end_date,
'Performance Review 2024 - n-1 managers' as name,
status,
type
from base_records
where name = 'Performance Review 2024'
)
select
dense_rank() over (order by task_name)::bigint as id,
cycle_start_date::date as start_date,
cycle_end_date::date as end_date,
task_name as name,
'Closed' as status, -- overwritten logic for Sarens
'annual' as type
from distinct_cycles
-- Combine original records with additional records
select * from base_records
where name != 'Performance Review 2024'
union all
select * from additional_records

View File

@@ -11,6 +11,28 @@ with step_agg as (
group by 1,2
),
-- When task_name = "Performance Review 2024", we need to update it to either "Performance Review 2024 - Generic" or "Performance Review 2024 - n-1 managers"
-- This is determined based on whether for the user_id, there exists a step which is not in ('Employee Self Review', 'Manager Review', 'Performance Conversation & Manager Sign-off', or 'Employee Sign-off')
remapped_steps as (
select
user_id,
case
when task_name != 'Performance Review 2024'
then task_name
when exists (
select 1
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }} s
where s.user_id = step_agg.user_id
and s.task_name = 'Performance Review 2024'
and s.step_title not in ('Employee Self Review', 'Manager Review', 'Performance Conversation & Manager Sign-off', 'Employee Sign-Off')
) then 'Performance Review 2024 - n-1 managers'
else 'Performance Review 2024 - Generic'
end as task_name,
completed_at,
status
from step_agg
),
total_scoring as (
select
user_id::text as user_id,
@@ -39,7 +61,7 @@ combined as (
ts.overall_rating,
w.worker_id,
c.cycle_id
from step_agg s
from remapped_steps s
left join total_scoring ts using (user_id, task_name)
left join worker_map w on w.worker_hris_id = s.user_id
left join cycle_map c on c.name = s.task_name

View File

@@ -19,10 +19,10 @@ steps as (
step_title as name,
case
when step_title in ('Auto-évaluation des employés', 'Employee Self Review', 'Sarens Projects Review',
'Approbation des employés', 'Strategy Review', 'Finance Review',
'Sales Review', 'Fleet Review', 'Self Review', 'LCM Review', 'Operations Review') then 1
'Approbation des employés',
'Self Review') then 1
when step_title in ('Évaluation du manager', 'Functional Manager Review', 'Discussion et approbation du manager',
'Manager Review') then 2
'Manager Review', 'Strategy Review', 'Finance Review', 'Sales Review', 'Fleet Review', 'LCM Review', 'Operations Review') then 2
when step_title in ('HR Review', 'SHEQ Review', 'Performance Conversation & Manager Sign-off') then 3
when step_title = 'Employee Sign-Off' then 4
else null -- fallback for any unexpected values
@@ -31,8 +31,55 @@ steps as (
step_submission_date::date as completed_at
from {{ source('tap_spreadsheets_anywhere', 'performance_review_steps') }}
where is_not_removed_from_task = 1
),
mandatory_steps as (
select 'Employee Self Review' as name, 1 as sequence_number
union all select 'Manager Review', 2
union all select 'Performance Conversation & Manager Sign-off', 3
union all select 'Employee Sign-Off', 4
),
-- Get all reviews that match our criteria
filtered_reviews as (
select distinct
r.review_id,
r.user_id,
r.task_name
from review_base r
where r.task_name like 'Performance Review 2024%'
),
-- Generate all expected steps for these reviews
expected_steps as (
select
r.review_id,
r.user_id,
r.task_name,
m.name,
m.sequence_number
from filtered_reviews r
cross join mandatory_steps m
),
-- Find which expected steps are missing from the source data
missing_steps as (
select
e.user_id,
e.task_name,
e.name,
e.sequence_number,
'Not started' as status,
null::date as completed_at
from expected_steps e
left join steps s
on e.user_id = s.user_id
and e.task_name like s.task_name || '%' -- Also map for remapped cycle generic/n-1
and e.name = s.name
where s.user_id is null
)
-- Combine existing steps with missing steps
select
row_number() over (order by s.user_id, s.task_name, s.name)::bigint as id,
r.review_id,
@@ -41,7 +88,11 @@ select
s.status,
s.completed_at,
null::date as due
from steps s
from (
select * from steps
union all
select * from missing_steps
) s
left join review_base r
on r.user_id = s.user_id
and r.task_name = s.task_name
and r.task_name like s.task_name || '%' -- Also map for remapped cycle generic/n-1

View File

@@ -10,6 +10,8 @@ WITH latest_workers AS (
FROM {{ source('tap_spreadsheets_anywhere', 'workers') }}
WHERE _sdc_deleted_at IS NULL
AND user_id IS NOT NULL and user_id != '' -- Skipping empty user_ids
-- Skipping users with non-numeric user_id, as they are not valid
AND user_id ~ '^[0-9]+$'
ORDER BY user_id, _sdc_received_at DESC
),
@@ -20,8 +22,8 @@ latest_positions AS (
SELECT DISTINCT ON (assigned_employee_id) *
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
WHERE _sdc_deleted_at IS NULL
AND primary_position = 'Yes'
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date DESC
-- AND primary_position = 'Yes' -- Removed this filter, as there are employees with only non-primary positions
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date DESC, assigned_unit_effective_date DESC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
),
-- Get the first position for each worker, to set the hire date
@@ -30,13 +32,14 @@ first_positions AS (
SELECT DISTINCT ON (assigned_employee_id) *
FROM {{ source('tap_spreadsheets_anywhere', 'positions') }}
WHERE _sdc_deleted_at IS NULL
ORDER BY assigned_employee_id, _sdc_received_at DESC, assigned_employee_effective_date ASC
ORDER BY assigned_employee_id, _sdc_received_at DESC, GREATEST(assigned_employee_effective_date, assigned_unit_effective_date) ASC, CASE WHEN primary_position = 'Yes' THEN 1 ELSE 0 END DESC
),
joined_data AS (
SELECT
w.user_id,
w.birth_date::DATE AS date_of_birth,
-- if birth_date is 01/01/1901, we consider it NULL
NULLIF(w.birth_date::DATE, '1901-01-01') AS date_of_birth,
w.gender,
w.nationality,
NULL::VARCHAR AS first_name, -- Not available
@@ -51,7 +54,7 @@ joined_data AS (
NULL::VARCHAR AS address_country,
NULL::VARCHAR AS phone_number,
NULL::VARCHAR AS driver_license,
COALESCE(fp.assigned_employee_effective_date, w.original_hire_date)::DATE AS employment_start,
COALESCE(GREATEST(fp.assigned_employee_effective_date, fp.assigned_unit_effective_date), GREATEST(w.original_hire_date, w.last_hire_date))::DATE AS employment_start,
w.user_type AS employment_type,
w.user_contract_type AS employment_contract_type,
w.contracting_company AS employment_contracting_company,