Skip to content

Commit

Permalink
Merge branch 'release/2.0.0-RC3'
Browse files Browse the repository at this point in the history
  • Loading branch information
Aklakan committed May 24, 2024
2 parents 24ad68d + 1e60d0c commit 5978189
Show file tree
Hide file tree
Showing 74 changed files with 1,766 additions and 834 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
.settings
target

*.jar

pom.xml.versionBackup
pom.xml.versionsBackup

Expand Down
44 changes: 23 additions & 21 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,35 +1,37 @@
FROM maven:3-jdk-11 as build
# IMPORTANT: This dockerfile is merely a fallback!
# The recommended way to build a docker image from this java project is using the jib maven plugin
# on the module lsq-pkg-docker-cli:
# mvn clean install
# mvn -pl :lsq-pkg-docker-cli jib:dockerBuild

# Building this Dockerfile requires buildkit:
# Ensure { "features": { "buildkit": true } } exists in /etc/docker/daemon.json
# (or wherever your deamon.json resides)

COPY . .
RUN mvn -Pdist,standalone clean install
ARG home="/lsq"

FROM maven:3-jdk-11 as build
ARG home
ENV HOME "$home"
RUN mkdir -p "$HOME"
WORKDIR "$HOME"
ADD . "$HOME"
RUN --mount=type=cache,target=/root/.m2 mvn -Pdist,standalone clean install

# Final running image
FROM openjdk:11-jre-slim

ARG home
ENV HOME "$home"
# Import the lsq-cli jar from the build step
COPY --from=build lsq-cli/target/lsq-cli-*-jar-with-dependencies.jar /app/lsq-cli.jar

RUN apt-get update && \
apt-get install -y wget

# # Install Spark for standalone context in /opt
# ENV APACHE_SPARK_VERSION=3.2.0
# ENV HADOOP_VERSION=3.2
# ENV SPARK_HOME=/opt/spark
# ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx2048M --driver-java-options=-Dlog4j.logLevel=info"
# RUN wget -q -O spark.tgz https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
# tar xzf spark.tgz -C /opt && \
# rm "spark.tgz" && \
# ln -s "/opt/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" $SPARK_HOME

COPY --from=build "$HOME/lsq-pkg-parent/lsq-pkg-uberjar-cli/target/"lsq-pkg-uberjar-cli-*-jar-with-dependencies.jar "$HOME/lsq-cli.jar"

# Using /data as working directory that will be shared with host for input/output files
WORKDIR /data
VOLUME [ "/data" ]

ENTRYPOINT ["java","-jar","/app/lsq-cli.jar"]
ENTRYPOINT ["java","-jar","$HOME/lsq-cli.jar"]
CMD ["-h"]

# Usage:
# docker run -it -v $(pwd):/data ghcr.io/aksw/lsq rx rdfize --endpoint=http://dbpedia.org/sparql virtuoso.dbpedia.log
# docker run -it -v $(pwd):/data ghcr.io/aksw/lsq rx rdfize --endpoint=http://dbpedia.org/sparql virtuoso.dbpedia.log

64 changes: 64 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
CWD = $(shell pwd)

POM = -f pom.xml
# Maven Clean Install Skip ; skip tests, javadoc, scaladoc, etc
MS = mvn -DskipTests -Dmaven.javadoc.skip=true -Dskip
MCIS = $(MS) clean install
MCCS = $(MS) clean compile

VER = $(error specify VER=releasefile-name e.g. VER=1.9.7-rc2)
loud = echo "@@" $(1);$(1)

# Source: https://stackoverflow.com/questions/4219255/how-do-you-get-the-list-of-targets-in-a-makefile
.PHONY: help

.ONESHELL:
help: ## Show these help instructions
@sed -rn 's/^([a-zA-Z_-]+):.*?## (.*)$$/"\1" "\2"/p' < $(MAKEFILE_LIST) | xargs printf "make %-20s# %s\n"

distjar: ## Create only the standalone jar-with-dependencies of rpt
$(MCCS) $(POM) package -Pstandalone,dist -pl :lsq-pkg-uberjar-cli -am $(ARGS)
file=`find '$(CWD)/lsq-pkg-parent/lsq-pkg-uberjar-cli/target' -name '*-jar-with-dependencies.jar'`
printf '\nCreated package:\n\n%s\n\n' "$$file"

rpm-rebuild: ## Rebuild the rpm package (minimal build of only required modules)
$(MCIS) $(POM) -Prpm -am -pl :lsq-pkg-rpm-cli $(ARGS)

rpm-reinstall: ## Reinstall rpm (requires prior build)
@p1=`find lsq-pkg-parent/lsq-pkg-rpm-cli/target | grep '\.rpm$$'`
sudo rpm -U "$$p1"

rpm-rere: rpm-rebuild rpm-reinstall ## Rebuild and reinstall rpm package


deb-rebuild: ## Rebuild the deb package (minimal build of only required modules)
$(MCIS) $(POM) -Pdeb -am -pl :lsq-pkg-deb-cli $(ARGS)

deb-reinstall: ## Reinstall deb (requires prior build)
@p1=`find lsq-pkg-parent/lsq-pkg-deb-cli/target | grep '\.deb$$'`
sudo dpkg -i "$$p1"

deb-rere: deb-rebuild deb-reinstall ## Rebuild and reinstall deb package


docker: ## Build Docker image
$(MCIS) $(POM) -am -pl :lsq-pkg-docker-cli $(ARGS)
cd lsq-pkg-parent/lsq-pkg-docker-cli && $(MS) jib:dockerBuild && cd ../..

release-bundle: ## Create files for Github upload
@set -eu
ver=$(VER)
$(call loud,$(MAKE) deb-rebuild)
p1=`find lsq-pkg-parent/lsq-pkg-deb-cli/target | grep '\.deb$$'`
$(call loud,cp "$$p1" "rpt-$${ver/-/\~}.deb")
$(call loud,$(MAKE) rpm-rebuild)
p1=`find lsq-pkg-parent/lsq-pkg-rpm-cli/target | grep '\.rpm$$'`
$(call loud,cp "$$p1" "rpt-$$ver.rpm")
$(call loud,$(MAKE) distjar)
file=`find '$(CWD)/lsq-pkg-parent/lsq-pkg-uberjar-cli/target' -name '*-jar-with-dependencies.jar'`
$(call loud,cp "$$file" "rpt-$$ver.jar")
$(call loud,$(MAKE) docker)
$(call loud,docker tag aksw/rpt aksw/rpt:$$ver)
$(call loud,gh release create v$$ver "rpt-$${ver/-/\~}.deb" "rpt-$$ver.rpm" "rpt-$$ver.jar")
$(call loud,docker push aksw/rpt:$$ver)
$(call loud,docker push aksw/rpt)
30 changes: 30 additions & 0 deletions docker-from-uberjar/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# IMPORTANT: This dockerfile is merely a fallback!
# The recommended way to build a docker image from this java project is using the jib maven plugin
# on the module lsq-pkg-docker-cli:
# mvn clean install
# mvn -pl :lsq-pkg-docker-cli jib:dockerBuild

# Building this Dockerfile requires buildkit:
# Ensure { "features": { "buildkit": true } } exists in /etc/docker/daemon.json
# (or wherever your deamon.json resides)

ARG home="/lsq"

# Final running image
FROM openjdk:11-jre-slim
ARG home
ENV HOME "$home"
# Import the lsq-cli jar from the build step
ADD lsq-pkg-uberjar-cli-*-jar-with-dependencies.jar "$HOME/lsq-cli.jar"

# Using /data as working directory that will be shared with host for input/output files
WORKDIR /data
VOLUME [ "/data" ]

# XXX $HOME Does not get expanded in the entry point
ENTRYPOINT ["java","-jar","/lsq/lsq-cli.jar"]
CMD ["-h"]

# Usage:
# docker run -it -v $(pwd):/data ghcr.io/aksw/lsq rx rdfize --endpoint=http://dbpedia.org/sparql virtuoso.dbpedia.log

10 changes: 10 additions & 0 deletions docker-from-uberjar/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Helper docker file to build the image from an uberjar and thus avoid a full build.

For example, this git repository might be checked out on a remote server with only docker available.
In this case, the lsq jar could be built locally and copied to the remote server using `scp`.

The filename must match the pattern: `lsq-pkg-uberjar-cli-*-jar-with-dependencies.jar`
This is also the file generated when running `make distjar` on the project root.

Once the uberjar is present, run `docker build .` to build the image.

6 changes: 6 additions & 0 deletions docker-from-uberjar/copy-uberjar.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

# In the project root, run "make distjar" to build the uberjar

cp ../lsq-pkg-parent/lsq-pkg-uberjar-cli/target/lsq-pkg-uberjar-cli-*-jar-with-dependencies.jar .

2 changes: 1 addition & 1 deletion docs/v2/concepts/data-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ nav_order: 1

**This is the data model of version 2 of LSQ**

![Depiction of the LSQ2 Data Model](https://github.com/AKSW/LSQ/blob/develop/docs/v2/images/lsq2-datamodel.png)
![Depiction of the LSQ2 Data Model](https://raw.githubusercontent.com/AKSW/LSQ/develop/docs/v2/images/lsq2-datamodel.png)
57 changes: 55 additions & 2 deletions docs/v2/usage/useful-queries.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,8 @@ SELECT ?exp ?bgpLabel ?bgpNodeLabel ?subBgpLabel ?subTpLabel ?bgpSize ?subTpSiz
?localExec lsqv:benchmarkRun ?exp .
# Get the measurements
?subBgpExec lsqv:hasElementExec [ lsqv:itemCount ?bgpSize ] .
?subTpExec lsqv:hasElementExec [ lsqv:itemCount ?subTpSize ] .
?subBgpExec lsqv:hasElementExec [ lsqv:resultCount ?bgpSize ] .
?subTpExec lsqv:hasElementExec [ lsqv:resultCount ?subTpSize ] .
?subTpInBgpExec lsqv:tpToBgpRatio ?subTpToBgpRatio .
Expand All @@ -324,3 +324,56 @@ SELECT ?exp ?bgpLabel ?bgpNodeLabel ?subBgpLabel ?subTpLabel ?bgpSize ?subTpSiz
| lsqr:xc-dbpedia.org-somedata_2020-07-25_at_05-08-2020_02:24:05 | "?obj a swc:SessionEvent ;\n ?prop ?target" | "?target" | "?obj ?prop ?target" | "?obj ?prop ?target" | 900 | 900 | 1 |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
```


#### Find Sparse Join Candidates among BGPs
```sparql
# Find Sparse join candidates: Search for basic graph patterns that have significantly
# fewer results than the smallest result set among its triple patterns.
PREFIX lsqv: <http://lsq.aksw.org/vocab#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?exp ?text ?bgpLabel ?bgpLabel ?bgpLabel ?tpLabel ?bgpSize ?tpSize ?bgpTpSizeRatio {
{ SELECT * { # Comment out this SELECT block to run this query on all data
{
?query lsqv:hasLocalExec ?localExec .
?localExec lsqv:hasBgpExec ?bgpExec .
# Links from the executions to the query's elements
?bgp lsqv:hasExec ?bgpExec ; rdfs:label ?bgpLabel .
?bgpExec lsqv:hasElementExec [ lsqv:resultCount ?bgpSize ] .
# Discard bgps with empty results
FILTER(?bgpSize > 0)
}
LATERAL {
# For the current bgpExec, get the tp with the smallest result set size
SELECT ?bgpExec ?tpExec ?tpSize {
?bgpExec lsqv:hasTpInBgpExec ?tpInBgpExec .
?tpInBgpExec lsqv:hasTpExec ?tpExec .
?tpExec lsqv:hasElementExec [ lsqv:resultCount ?tpSize ] .
} ORDER BY ASC(?tpSize) LIMIT 1
}
# Compute the ratio of the bgp size vs smallest tp size
BIND(?bgpSize / ?tpSize AS ?bgpTpSizeRatio)
?tp lsqv:hasExec ?tpExec ; rdfs:label ?tpLabel .
?localExec lsqv:benchmarkRun ?exp .
?query lsqv:text ?text
} LIMIT 1000 }
}
ORDER BY ASC(?bgpTpSizeRatio)
```

```
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| exp | bgpLabel | tpLabel | bgpSize | tpSize | bgpTpSizeRatio |
===========================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
| <http://lsq.aksw.org/xc-dbpedia_2020-10-10_at_10-10-2020_18:29:19> | "?id a <http://dbpedia.org/ontology/Film> ;\n <http://dbpedia.org/property/title> ?ft ;\n <http://dbpedia.org/ontology/imdbId> ?imdb_id" | "?id <http://dbpedia.org/ontology/imdbId> ?imdb_id" | "23"^^<http://www.w3.org/2001/XMLSchema#long> | "70876"^^<http://www.w3.org/2001/XMLSchema#long> | 0.000324510412551498391557 |
| <http://lsq.aksw.org/xc-dbpedia_2020-10-10_at_10-10-2020_18:29:19> | "?artist a <http://dbpedia.org/ontology/MusicalArtist> ;\n <http://www.w3.org/2000/01/rdf-schema#label> ?name ;\n <http://dbpedia.org/property/genre> <http://dbpedia.org/resource/Acid_rock> ;\n <http://dbpedia.org/property/genre> <http://dbpedia.org/resource/Funk_rock> ;\n <http://dbpedia.org/property/genre> <http://dbpedia.org/resource/Psychedelic_rock>" | "?artist <http://dbpedia.org/property/genre> <http://dbpedia.org/resource/Acid_rock>" | "1"^^<http://www.w3.org/2001/XMLSchema#long> | "356"^^<http://www.w3.org/2001/XMLSchema#long> | 0.002808988764044943820225 |
| <http://lsq.aksw.org/xc-dbpedia_2020-10-10_at_10-10-2020_18:29:19> | "?author a <http://dbpedia.org/ontology/Writer> .\n?film <http://dbpedia.org/ontology/writer> ?author .\n?actor <http://dbpedia.org/property/starring> ?film .\n?author <http://www.w3.org/2002/07/owl#sameAs> ?nytId" | "?author <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Writer>" | "116"^^<http://www.w3.org/2001/XMLSchema#long> | "30649"^^<http://www.w3.org/2001/XMLSchema#long> | 0.003784789063264706841985 |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
```
2 changes: 1 addition & 1 deletion lsq-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<parent>
<groupId>org.aksw.simba.lsq</groupId>
<artifactId>lsq-parent</artifactId>
<version>2.0.0-RC2</version>
<version>2.0.0-RC3</version>
</parent>

<!-- scm section needs to be duplicated on child module for github-release-plugin;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package org.aksw.simba.lsq.cli.cmd.base;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.aksw.simba.lsq.cli.main.MainCliLsq;
import org.aksw.simba.lsq.enricher.core.LsqEnricherRegistry;

import picocli.CommandLine.ArgGroup;
import picocli.CommandLine.Option;
import picocli.CommandLine.Parameters;

public class CmdLsqAnalyzeBase {
@Option(names = { "-h", "--help" }, usageHelp = true)
public boolean help = false;

@ArgGroup(exclusive = true, multiplicity = "0..1")
public EnricherSpec enricherSpec = new EnricherSpec();

public static class EnricherSpec {
@Option(names = { "--only" }, completionCandidates = CompletionCandidatesEnrichers.class)
public List<String> inclusions = null;

@Option(names = { "--exclude" }, completionCandidates = CompletionCandidatesEnrichers.class)
public List<String> exclusions = null;

public boolean isWhitelist() {
return inclusions != null;
}

public List<String> getRawList() {
return inclusions != null
? inclusions
: exclusions != null
? exclusions
: List.of();
}

public List<String> getEffectiveList() {
List<String> rawList = getRawList();
boolean isWhitelist = isWhitelist();
List<String> result = MainCliLsq.effectiveList(rawList, isWhitelist, new ArrayList<>(LsqEnricherRegistry.get().getKeys()));
return result;
}
}

public static class CompletionCandidatesEnrichers
implements Iterable<String> {
@Override
public Iterator<String> iterator() {
return LsqEnricherRegistry.get().getKeys().iterator();
}
}

@Parameters(arity = "1..*", description = "file-list to probe")
public List<String> nonOptionArgs = new ArrayList<>();
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import org.aksw.simba.lsq.core.LsqRdfizeSpec;

import picocli.CommandLine.ArgGroup;
import picocli.CommandLine.Option;
import picocli.CommandLine.Parameters;

Expand Down Expand Up @@ -36,8 +37,22 @@ public class CmdLsqRdfizeBase
@Option(names={"-s", "--slim"}, description="Slim output only retains query, hostname, timestamp and sequence id")
public boolean slimMode = false;

@Option(names={"-e", "--endpoint"}, required=true, description="Service endpoint for which the logs were generated")
public String endpointUrl = null;

@ArgGroup(exclusive = true, multiplicity = "1")
public RdfizationLevel rdfizationLevel = new RdfizationLevel();

public static class RdfizationLevel {
@Option(names={"--query-only"}, description="Only RDFize the query. Do not track its occurrence.")
public boolean queryOnly = false;

// Endpoint is not needed if --query-only is specified
@Option(names={"-e", "--endpoint"}, required=true, description="Service endpoint for which the logs were generated")
public String endpointUrl = null;
}

@Option(names = { "-d", "--used-prefixes" }, description = "Number of records (bindings/quads) by which to defer RDF output in order to analyze used prefixes; default: ${DEFAULT-VALUE}", defaultValue = "100")
public long usedPrefixDefer;


@Parameters(arity="1..*", description="log sources")
public List<String> nonOptionArgs = new ArrayList<>();
Expand Down Expand Up @@ -98,13 +113,18 @@ public String getHostSalt() {
}

@Override
public boolean isSlimMode() {
return slimMode;
public String getEndpointUrl() {
return rdfizationLevel.endpointUrl;
}

@Override
public String getEndpointUrl() {
return endpointUrl;
public boolean isQueryOnly() {
return rdfizationLevel.queryOnly;
}

@Override
public boolean isSlimMode() {
return slimMode;
}

@Override
Expand Down
Loading

0 comments on commit 5978189

Please sign in to comment.