-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
init: add bzip2 functions as a pg extension
- Loading branch information
0 parents
commit 682aa4f
Showing
17 changed files
with
459 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
name: CI | ||
|
||
on: [push, pull_request] | ||
|
||
jobs: | ||
test: | ||
|
||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
pg-version: ['12', '13', '14', '15', '16'] | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: cachix/install-nix-action@v18 | ||
with: | ||
nix_path: nixpkgs=channel:nixos-unstable | ||
- name: Run tests | ||
run: nix-shell --run "with-pg-${{ matrix.pg-version }} make installcheck" | ||
- if: ${{ failure() }} | ||
run: cat output/regression.diffs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
*.zst | ||
*.csv | ||
*.o | ||
.history | ||
results/ | ||
regression.diffs | ||
regression.out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Copyright (c) 2023 Steve Chavez | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining | ||
a copy of this software and associated documentation files (the | ||
"Software"), to deal in the Software without restriction, including | ||
without limitation the rights to use, copy, modify, merge, publish, | ||
distribute, sublicense, and/or sell copies of the Software, and to | ||
permit persons to whom the Software is furnished to do so, subject to | ||
the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included | ||
in all copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
EXTENSION = bzip | ||
EXTVERSION = 0.1.0 | ||
|
||
all: sql/$(EXTENSION)--$(EXTVERSION).sql $(EXTENSION).control | ||
|
||
sql/$(EXTENSION)--$(EXTVERSION).sql: sql/$(EXTENSION).sql | ||
cp $< $@ | ||
|
||
$(EXTENSION).control: | ||
sed "s/@EXTVERSION@/$(EXTVERSION)/g" $(EXTENSION).control.in > $(EXTENSION).control | ||
|
||
DATA = $(wildcard sql/*--*.sql) | ||
|
||
MODULE_big = $(EXTENSION) | ||
OBJS = src/pg_bzip.o | ||
|
||
TESTS = $(wildcard test/sql/*.sql) | ||
REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS)) | ||
REGRESS_OPTS = --inputdir=test | ||
|
||
PG_CONFIG = pg_config | ||
SHLIB_LINK = -lbz2 | ||
|
||
PG_CFLAGS = -std=c99 -Wno-declaration-after-statement -Wall -Werror -Wshadow | ||
|
||
PGXS := $(shell $(PG_CONFIG) --pgxs) | ||
include $(PGXS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# pg_bzip | ||
|
||
## Motivation | ||
|
||
If you obtain data compressed as bzip2, whether through HTTP (with [pgsql-http](https://github.com/pramsey/pgsql-http)) or from a file | ||
(with [pgsql-fio](https://github.com/csimsek/pgsql-fio) or the native [pg_read_binary_file](https://pgpedia.info/p/pg_read_binary_file.html)), it's convenient to | ||
decompress it in SQL directly. This extension is just for that, it provides functions to decompress and compress data using bzip2. | ||
|
||
## Functions | ||
|
||
- `bzcat(data bytea) returns bytea` | ||
|
||
This function mimics the [bzcat](https://linux.die.net/man/1/bzcat) command, which decompresses data using bzip2. | ||
|
||
```sql | ||
select convert_from(bzcat(pg_read_binary_file('/path/to/all_movies.csv.bz2')), 'utf8') as contents; | ||
|
||
contents | ||
-------------------------------------------------------------------------------------------------------------------------------------------- | ||
"id","name","parent_id","date" + | ||
"2","Ariel","8384","1988-10-21" + | ||
"3","Varjoja paratiisissa","8384","1986-10-17" + | ||
"4","État de siège",\N,"1972-12-30" + | ||
"5","Four Rooms",\N,"1995-12-22" + | ||
"6","Judgment Night",\N,"1993-10-15" + | ||
"8","Megacities - Life in Loops",\N,"2006-01-01" + | ||
"9","Sonntag, im August",\N,"2004-09-22" + | ||
"11","Star Wars: Episode IV – A New Hope","10","1977-05-25" + | ||
"12","Finding Nemo","112246","2003-05-30" + | ||
... | ||
.... | ||
..... | ||
``` | ||
|
||
- `bzip2(data bytea, compression_level int default 9) returns bytea` | ||
|
||
This function is a simplified version of the [bzip2](https://linux.die.net/man/1/bzip2) command. It compresses data using bzip2. | ||
|
||
For this example we'll use `fio_writefile` from [pgsql-fio](https://github.com/csimsek/pgsql-fio), which offers a convenient way to write a file from SQL. | ||
|
||
```sql | ||
select fio_writefile('/home/stevechavez/Projects/pg_bzip/my_text.bz2', bzip2(repeat('my secret text to be compressed', 1000)::bytea)) as writesize; | ||
|
||
writesize | ||
----------- | ||
109 | ||
``` | ||
|
||
## Installation | ||
|
||
bzip2 is required. Under Debian/Ubuntu you can get it with | ||
|
||
``` | ||
sudo apt install libbz2-dev | ||
``` | ||
|
||
Then on this repo | ||
|
||
``` | ||
make && make install | ||
``` | ||
|
||
Now on SQL you can do: | ||
|
||
``` | ||
CREATE EXTENSION bzip; | ||
``` | ||
|
||
`pg_bzip` is tested to work on PostgreSQL >= 12. | ||
|
||
## Development | ||
|
||
[Nix](https://nixos.org/download.html) is used to get an isolated and reproducible enviroment with multiple postgres versions. | ||
|
||
``` | ||
# enter the Nix environment | ||
$ nix-shell | ||
# to run the tests | ||
$ with-pg-16 make installcheck | ||
# to interact with the isolated pg | ||
$ with-pg-16 psql | ||
# you can choose the pg version | ||
$ with-pg-15 psql | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
default_version = '@EXTVERSION@' | ||
relocatable = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ stdenv, postgresql, bzip2, extensionName }: | ||
|
||
stdenv.mkDerivation { | ||
name = extensionName; | ||
|
||
buildInputs = [ postgresql bzip2 ]; | ||
|
||
src = ../.; | ||
|
||
installPhase = '' | ||
install -D *.so -t $out/lib | ||
install -D -t $out/share/postgresql/extension sql/*.sql | ||
install -D -t $out/share/postgresql/extension ${extensionName}.control | ||
''; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
{ postgresql, writeShellScriptBin, options ? "" } : | ||
|
||
let | ||
ver = builtins.head (builtins.splitVersion postgresql.version); | ||
script = '' | ||
export PATH=${postgresql}/bin:"$PATH" | ||
tmpdir="$(mktemp -d)" | ||
export PGDATA="$tmpdir" | ||
export PGHOST="$tmpdir" | ||
export PGUSER=postgres | ||
export PGDATABASE=postgres | ||
trap 'pg_ctl stop -m i && rm -rf "$tmpdir"' sigint sigterm exit | ||
PGTZ=UTC initdb --no-locale --encoding=UTF8 --nosync -U "$PGUSER" | ||
default_options="-F -c listen_addresses=\"\" -k $PGDATA" | ||
pg_ctl start -o "$default_options" -o "${options}" | ||
cp ${../test/samples/all_movies.csv} $tmpdir/all_movies.csv | ||
cp ${../test/samples/all_movies.csv.bz2} $tmpdir/all_movies.csv.bz2 | ||
"$@" | ||
''; | ||
in | ||
writeShellScriptBin "with-pg-${ver}" script |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ stdenv, lib, fetchFromGitHub, postgresql }: | ||
|
||
stdenv.mkDerivation rec { | ||
name = "pgsql-fio"; | ||
|
||
buildInputs = [ postgresql ]; | ||
|
||
src = fetchFromGitHub { | ||
owner = "csimsek"; | ||
repo = name; | ||
rev = "9f6133c7ac4c50a14cf983943cb9916f994034bd"; | ||
hash = "sha256-uoWoFfm8iM/FzBtIH5SF6TPRhDXDMVftueWjMYggiJY="; | ||
}; | ||
|
||
installPhase = '' | ||
install -D fio.so -t $out/lib | ||
install -D fio--1.0.sql -t $out/share/postgresql/extension | ||
install -D fio.control -t $out/share/postgresql/extension | ||
''; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
with import (builtins.fetchTarball { | ||
name = "2023-09-16"; | ||
url = "https://github.com/NixOS/nixpkgs/archive/ae5b96f3ab6aabb60809ab78c2c99f8dd51ee678.tar.gz"; | ||
sha256 = "11fpdcj5xrmmngq0z8gsc3axambqzvyqkfk23jn3qkx9a5x56xxk"; | ||
}) {}; | ||
mkShell { | ||
buildInputs = | ||
let | ||
extensionName = "bzip"; | ||
supportedPgVersions = [ | ||
postgresql_16 | ||
postgresql_15 | ||
postgresql_14 | ||
postgresql_13 | ||
postgresql_12 | ||
]; | ||
pgWExtension = { postgresql }: | ||
postgresql.withPackages (p: [ | ||
(callPackage ./nix/pgExtension.nix { inherit postgresql extensionName; }) | ||
(callPackage ./nix/pgsql-fio.nix { inherit postgresql; }) # only used for manual tests where writing to a file is required | ||
]); | ||
extAll = map (x: callPackage ./nix/pgScript.nix { postgresql = pgWExtension { postgresql = x;}; }) supportedPgVersions; | ||
in | ||
extAll; | ||
|
||
shellHook = '' | ||
export HISTFILE=.history | ||
''; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
create or replace function bzcat(data bytea) | ||
returns bytea | ||
language 'c' | ||
immutable | ||
strict | ||
as 'bzip'; | ||
|
||
create or replace function bzip2(data bytea, compression_level int default 9) | ||
returns bytea | ||
language 'c' | ||
immutable | ||
strict | ||
as 'bzip'; |
Oops, something went wrong.