Files
lakehouse-personio/tap-personio/tap_personio/client.py
2025-06-03 16:22:21 +02:00

175 lines
5.2 KiB
Python

"""REST client handling, including PersonioStream base class."""
from __future__ import annotations
import decimal
import typing as t
from functools import cached_property
from importlib import resources
from singer_sdk.helpers.jsonpath import extract_jsonpath
from singer_sdk.pagination import BaseAPIPaginator # noqa: TC002
from singer_sdk.streams import RESTStream
from tap_personio.auth import PersonioAuthenticator
from singer_sdk.pagination import BaseHATEOASPaginator, first
if t.TYPE_CHECKING:
import requests
from singer_sdk.helpers.types import Auth, Context
# TODO: Delete this is if not using json files for schema definition
SCHEMAS_DIR = resources.files(__package__) / "schemas"
class MyPaginator(BaseHATEOASPaginator):
def get_next_url(self, response):
try:
return first(
extract_jsonpath("$._meta.links.next.href", response.json())
)
except StopIteration:
return None
class PersonioStream(RESTStream):
"""Personio stream class."""
# Limit the number of results per page
# Max 50 according to Personio API documentation.
RESULTS_PER_PAGE = 50
NEXT_PAGE_JSONPATH = "$.cursor"
# Update this value if necessary or override `parse_response`.
records_jsonpath = "$._data[*]"
@property
def url_base(self) -> str:
"""Return the API URL root, configurable via tap settings."""
# TODO: hardcode a value here, or retrieve it from self.config
return "https://api.personio.de/v1"
@cached_property
def authenticator(self) -> Auth:
"""Return a new authenticator object.
Returns:
An authenticator instance.
"""
return PersonioAuthenticator.create_for_stream(self)
@property
def http_headers(self) -> dict:
"""Return the http headers needed.
Returns:
A dictionary of HTTP headers.
"""
return {
'X-Personio-Partner-ID': self.config.get("partner_id", ""),
'X-Personio-App-ID': self.config.get("app_id", ""),
}
def get_new_paginator(self) -> BaseAPIPaginator:
"""Create a new pagination helper instance.
If the source API can make use of the `next_page_token_jsonpath`
attribute, or it contains a `X-Next-Page` header in the response
then you can remove this method.
If you need custom pagination that uses page numbers, "next" links, or
other approaches, please read the guide: https://sdk.meltano.com/en/v0.25.0/guides/pagination-classes.html.
Returns:
A pagination helper instance.
"""
return MyPaginator()
def get_url_params(
self,
context: Context | None, # noqa: ARG002
next_page_token: t.Any | None, # noqa: ANN401
) -> dict[str, t.Any]:
"""Return a dictionary of values to be used in URL parameterization.
Args:
context: The stream context.
next_page_token: The next page index or value.
Returns:
A dictionary of URL query parameters.
"""
params: dict = {}
# Next page token is a URL, so we can to parse it to extract the query string
if next_page_token:
params.update(parse_qsl(next_page_token.query))
# Set the results limit
params["limit"] = self.RESULTS_PER_PAGE
# No sorting support for Personio API, so commented this out.
#if self.replication_key:
# params["sort"] = "asc"
# params["order_by"] = self.replication_key
return params
def prepare_request_payload(
self,
context: Context | None, # noqa: ARG002
next_page_token: t.Any | None, # noqa: ARG002, ANN401
) -> dict | None:
"""Prepare the data payload for the REST API request.
By default, no payload will be sent (return None).
Args:
context: The stream context.
next_page_token: The next page index or value.
Returns:
A dictionary with the JSON body for a POST requests.
"""
# TODO: Delete this method if no payload is required. (Most REST APIs.)
return None
def parse_response(self, response: requests.Response) -> t.Iterable[dict]:
"""Parse the response and return an iterator of result records.
Args:
response: The HTTP ``requests.Response`` object.
Yields:
Each record from the source.
"""
# TODO: Parse response body and return a set of records.
yield from extract_jsonpath(
self.records_jsonpath,
input=response.json(parse_float=decimal.Decimal),
)
def post_process(
self,
row: dict,
context: Context | None = None, # noqa: ARG002
) -> dict | None:
"""As needed, append or transform raw data to match expected structure.
Args:
row: An individual record from the stream.
context: The stream context.
Returns:
The updated record dictionary, or ``None`` to skip the record.
"""
# TODO: Delete this method if not needed.
return row