Skip to content

Commit

Permalink
init: add bzip2 functions as a pg extension
Browse files Browse the repository at this point in the history
  • Loading branch information
steve-chavez committed Dec 15, 2023
0 parents commit 682aa4f
Show file tree
Hide file tree
Showing 17 changed files with 459 additions and 0 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: CI

on: [push, pull_request]

jobs:
test:

runs-on: ubuntu-latest
strategy:
matrix:
pg-version: ['12', '13', '14', '15', '16']

steps:
- uses: actions/checkout@v3
- uses: cachix/install-nix-action@v18
with:
nix_path: nixpkgs=channel:nixos-unstable
- name: Run tests
run: nix-shell --run "with-pg-${{ matrix.pg-version }} make installcheck"
- if: ${{ failure() }}
run: cat output/regression.diffs
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
*.zst
*.csv
*.o
.history
results/
regression.diffs
regression.out
20 changes: 20 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Copyright (c) 2023 Steve Chavez

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 changes: 27 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
EXTENSION = bzip
EXTVERSION = 0.1.0

all: sql/$(EXTENSION)--$(EXTVERSION).sql $(EXTENSION).control

sql/$(EXTENSION)--$(EXTVERSION).sql: sql/$(EXTENSION).sql
cp $< $@

$(EXTENSION).control:
sed "s/@EXTVERSION@/$(EXTVERSION)/g" $(EXTENSION).control.in > $(EXTENSION).control

DATA = $(wildcard sql/*--*.sql)

MODULE_big = $(EXTENSION)
OBJS = src/pg_bzip.o

TESTS = $(wildcard test/sql/*.sql)
REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))
REGRESS_OPTS = --inputdir=test

PG_CONFIG = pg_config
SHLIB_LINK = -lbz2

PG_CFLAGS = -std=c99 -Wno-declaration-after-statement -Wall -Werror -Wshadow

PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
87 changes: 87 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# pg_bzip

## Motivation

If you obtain data compressed as bzip2, whether through HTTP (with [pgsql-http](https://github.com/pramsey/pgsql-http)) or from a file
(with [pgsql-fio](https://github.com/csimsek/pgsql-fio) or the native [pg_read_binary_file](https://pgpedia.info/p/pg_read_binary_file.html)), it's convenient to
decompress it in SQL directly. This extension is just for that, it provides functions to decompress and compress data using bzip2.

## Functions

- `bzcat(data bytea) returns bytea`

This function mimics the [bzcat](https://linux.die.net/man/1/bzcat) command, which decompresses data using bzip2.

```sql
select convert_from(bzcat(pg_read_binary_file('/path/to/all_movies.csv.bz2')), 'utf8') as contents;

contents
--------------------------------------------------------------------------------------------------------------------------------------------
"id","name","parent_id","date" +
"2","Ariel","8384","1988-10-21" +
"3","Varjoja paratiisissa","8384","1986-10-17" +
"4","État de siège",\N,"1972-12-30" +
"5","Four Rooms",\N,"1995-12-22" +
"6","Judgment Night",\N,"1993-10-15" +
"8","Megacities - Life in Loops",\N,"2006-01-01" +
"9","Sonntag, im August",\N,"2004-09-22" +
"11","Star Wars: Episode IV – A New Hope","10","1977-05-25" +
"12","Finding Nemo","112246","2003-05-30" +
...
....
.....
```

- `bzip2(data bytea, compression_level int default 9) returns bytea`

This function is a simplified version of the [bzip2](https://linux.die.net/man/1/bzip2) command. It compresses data using bzip2.

For this example we'll use `fio_writefile` from [pgsql-fio](https://github.com/csimsek/pgsql-fio), which offers a convenient way to write a file from SQL.

```sql
select fio_writefile('/home/stevechavez/Projects/pg_bzip/my_text.bz2', bzip2(repeat('my secret text to be compressed', 1000)::bytea)) as writesize;

writesize
-----------
109
```

## Installation

bzip2 is required. Under Debian/Ubuntu you can get it with

```
sudo apt install libbz2-dev
```

Then on this repo

```
make && make install
```

Now on SQL you can do:

```
CREATE EXTENSION bzip;
```

`pg_bzip` is tested to work on PostgreSQL >= 12.

## Development

[Nix](https://nixos.org/download.html) is used to get an isolated and reproducible enviroment with multiple postgres versions.

```
# enter the Nix environment
$ nix-shell
# to run the tests
$ with-pg-16 make installcheck
# to interact with the isolated pg
$ with-pg-16 psql
# you can choose the pg version
$ with-pg-15 psql
```
2 changes: 2 additions & 0 deletions bzip.control.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
default_version = '@EXTVERSION@'
relocatable = false
15 changes: 15 additions & 0 deletions nix/pgExtension.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{ stdenv, postgresql, bzip2, extensionName }:

stdenv.mkDerivation {
name = extensionName;

buildInputs = [ postgresql bzip2 ];

src = ../.;

installPhase = ''
install -D *.so -t $out/lib
install -D -t $out/share/postgresql/extension sql/*.sql
install -D -t $out/share/postgresql/extension ${extensionName}.control
'';
}
29 changes: 29 additions & 0 deletions nix/pgScript.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{ postgresql, writeShellScriptBin, options ? "" } :

let
ver = builtins.head (builtins.splitVersion postgresql.version);
script = ''
export PATH=${postgresql}/bin:"$PATH"
tmpdir="$(mktemp -d)"
export PGDATA="$tmpdir"
export PGHOST="$tmpdir"
export PGUSER=postgres
export PGDATABASE=postgres
trap 'pg_ctl stop -m i && rm -rf "$tmpdir"' sigint sigterm exit
PGTZ=UTC initdb --no-locale --encoding=UTF8 --nosync -U "$PGUSER"
default_options="-F -c listen_addresses=\"\" -k $PGDATA"
pg_ctl start -o "$default_options" -o "${options}"
cp ${../test/samples/all_movies.csv} $tmpdir/all_movies.csv
cp ${../test/samples/all_movies.csv.bz2} $tmpdir/all_movies.csv.bz2
"$@"
'';
in
writeShellScriptBin "with-pg-${ver}" script
20 changes: 20 additions & 0 deletions nix/pgsql-fio.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{ stdenv, lib, fetchFromGitHub, postgresql }:

stdenv.mkDerivation rec {
name = "pgsql-fio";

buildInputs = [ postgresql ];

src = fetchFromGitHub {
owner = "csimsek";
repo = name;
rev = "9f6133c7ac4c50a14cf983943cb9916f994034bd";
hash = "sha256-uoWoFfm8iM/FzBtIH5SF6TPRhDXDMVftueWjMYggiJY=";
};

installPhase = ''
install -D fio.so -t $out/lib
install -D fio--1.0.sql -t $out/share/postgresql/extension
install -D fio.control -t $out/share/postgresql/extension
'';
}
29 changes: 29 additions & 0 deletions shell.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
with import (builtins.fetchTarball {
name = "2023-09-16";
url = "https://github.com/NixOS/nixpkgs/archive/ae5b96f3ab6aabb60809ab78c2c99f8dd51ee678.tar.gz";
sha256 = "11fpdcj5xrmmngq0z8gsc3axambqzvyqkfk23jn3qkx9a5x56xxk";
}) {};
mkShell {
buildInputs =
let
extensionName = "bzip";
supportedPgVersions = [
postgresql_16
postgresql_15
postgresql_14
postgresql_13
postgresql_12
];
pgWExtension = { postgresql }:
postgresql.withPackages (p: [
(callPackage ./nix/pgExtension.nix { inherit postgresql extensionName; })
(callPackage ./nix/pgsql-fio.nix { inherit postgresql; }) # only used for manual tests where writing to a file is required
]);
extAll = map (x: callPackage ./nix/pgScript.nix { postgresql = pgWExtension { postgresql = x;}; }) supportedPgVersions;
in
extAll;

shellHook = ''
export HISTFILE=.history
'';
}
13 changes: 13 additions & 0 deletions sql/bzip.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
create or replace function bzcat(data bytea)
returns bytea
language 'c'
immutable
strict
as 'bzip';

create or replace function bzip2(data bytea, compression_level int default 9)
returns bytea
language 'c'
immutable
strict
as 'bzip';
Loading

0 comments on commit 682aa4f

Please sign in to comment.