Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the definition of 'analyzer' in Semgrep rules #286

Merged
merged 9 commits into from
Sep 9, 2024
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ clean:
# This takes a while but ensures we use the correct versions of the atd tools.
.PHONY: setup
setup:
# Please install check-jsonschema (Python tool) if this fails:
check-jsonschema --version
opam update
opam install --deps-only .

Expand Down
39 changes: 34 additions & 5 deletions rule_schema_v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,38 @@ $defs:
oneOf:
- required: [ http ]
- required: [ aws ]
analyzer:
title: Analyzer to use
oneOf:
- const: entropy
- const: entropy_v2
- const: redos
- type: object
properties:
kind:
const: entropy
required:
- kind
additionalProperties: false
- type: object
properties:
kind:
const: entropy_v2
mode:
oneOf:
- const: lax
- const: strict
- const: default
required:
- kind
additionalProperties: false
- type: object
properties:
kind:
const: redos
required:
- kind
additionalProperties: false
# EXPERIMENTAL
aws-request-content:
properties:
Expand Down Expand Up @@ -227,7 +259,7 @@ $defs:
metavariable:
type: string
analyzer:
type: string
$ref: "#/$defs/analyzer"
general-pattern-content:
title: "Return finding where code matches against the following pattern"
oneOf:
Expand Down Expand Up @@ -482,13 +514,10 @@ $defs:
title: Inspect a metavariable with a given analyzer
properties:
analyzer:
type: string
title: Analyzer to use
$ref: "#/$defs/analyzer"
metavariable:
type: string
title: Metavariable to analyze
options:
type: object
required:
- analyzer
- metavariable
Expand Down
9 changes: 8 additions & 1 deletion rule_schema_v2.atd
Original file line number Diff line number Diff line change
Expand Up @@ -432,9 +432,16 @@ type mvar = string

mjambon marked this conversation as resolved.
Show resolved Hide resolved
type analyzer = [
| Entropy <json name="entropy">
| EntropyV2 <json name="entropy_v2">
| EntropyV2 <json name="entropy_v2"> of entropy_analysis_mode
| Redos <json name="redos">
]
<json adapter.ocaml="Rule_schema_v2_adapter.Analyzer">

type entropy_analysis_mode = [
| Lax <json name="lax">
| Default <json name="default">
| Strict <json name="strict">
]

(* --------------------------- *)
(* Focus condition *)
Expand Down
94 changes: 94 additions & 0 deletions rule_schema_v2_adapter.ml
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,97 @@ module ProjectDependsOn = struct
let restore (_atd : Yojson.Safe.t) : Yojson.Safe.t =
failwith "Rule_schema_v2_adapter.ProjectDependsOn.restore not implemented"
end

(* This is the name of the field that contains the variant constructor
in the user-friendly YAML convention we use to represent variants.
See 'normalize_variant'. *)
let kind_field_name = "kind"

(*
A generic representation for variants. The parameters, if any, must be
an ATD record (JSON object, Yojson assoc).

type t = [
| A <json name="a">
| B <json name="b"> of b
]

type b = {
(* all the fields are optional *)
?k: int option;
}

1. OCaml A is represented as JSON "A". The adapter doesn't change it.
2. OCaml B {k = 42} is represented as JSON {"kind": "B", "k": 42}
which the adapter converts to JSON ["kind", {"k", 42}].

Additionally, the alternate notations {"kind": "A"} and "B" can be
supported in addition to "A" and {"kind": "B"}. This requires specifying
the constructors for which the alternate notation is supported.
Constructors that don't expect an argument must be listed as 'enum'.
Constructors that expect an object argument must be listed as 'obj'.
This gives us the following call:

normalize_generic_variant ~enum:["a"] ~obj:["b"] json

Without specifying 'enum' or 'obj', YAML/JSON interpretation will be
stricter by not tolerating the alternate notations {"kind": "A"} or "B".

YAML example:

- a

- kind: b
k: 42

# assuming default properties:
- kind: b

# shorthand for {kind: b}:
- b

# long form for "a":
- kind: a

TODO: make the ATD tools (atdgen, atdpy, ...) support these alternate
formats as well?
This would allow us to make adapters generic i.e. without
having to specify the 'enum' and 'obj' options. In the example above,
atdgen would read "b" as ["b", {}] and would read ["a", {}] or ["a", null]
as "a" without complaining.
*)
let normalize_variant
?(enum = [])
?(obj = [])
(orig : Yojson.Safe.t ) : Yojson.Safe.t =
match orig with
| `Assoc props ->
(match List.partition (fun (k, _v) -> k = kind_field_name) props with
| [_, `String kind], [] when List.mem kind enum -> `String kind
| [_, `String kind], other_fields ->
`List [`String kind; `Assoc other_fields]
| _missing_or_duplicate_kind, _ -> orig
)
| `String kind when List.mem kind obj -> `List [`String kind; `Assoc []]
| _string_or_malformed -> orig

(* Unlike 'normalize_variant', this if fully generic.
(because we're going from a strict format to a looser format) *)
let restore_variant
(atd : Yojson.Safe.t ) : Yojson.Safe.t =
match atd with
| `String _ as str -> str
| `List [`String _ as kind; `Assoc fields] ->
`Assoc ((kind_field_name, kind) :: fields)
| _malformed -> atd

module Analyzer = struct
let normalize orig =
normalize_variant
~enum:["entropy"; "redos"]
~obj:["entropy_v2"]
orig

let restore (atd : Yojson.Safe.t) : Yojson.Safe.t =
restore_variant atd
end
1 change: 1 addition & 0 deletions tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
.PHONY: test
test:
./test-ast
$(MAKE) -C jsonschema test

.PHONY: clean
clean:
Expand Down
4 changes: 4 additions & 0 deletions tests/jsonschema/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Validate sample YAML data against JSON Schemas
.PHONY: test
test:
./validate ../../rule_schema_v1.yaml rules
10 changes: 10 additions & 0 deletions tests/jsonschema/rules/entropy-misspelled.fail.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
rules:
- id: test-entropy
patterns:
- pattern: "$STRING"
- metavariable-analysis:
metavariable: $STRING
analyzer: entropt
message: Semgrep found a match
languages: [python]
severity: WARNING
13 changes: 13 additions & 0 deletions tests/jsonschema/rules/entropy-mode.fail.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
rules:
- id: test-entropy
patterns:
- pattern: "$STRING"
- metavariable-analysis:
metavariable: $STRING
analyzer:
kind: entropy
# illegal option for 'entropy':
mode: strict
message: Semgrep found a match
languages: [python]
severity: WARNING
10 changes: 10 additions & 0 deletions tests/jsonschema/rules/entropy-short.ok.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
rules:
- id: test-entropy
patterns:
- pattern: "$STRING"
- metavariable-analysis:
metavariable: $STRING
analyzer: entropy
message: Semgrep found a match
languages: [python]
severity: WARNING
12 changes: 12 additions & 0 deletions tests/jsonschema/rules/entropy_v2-invalid-mode.fail.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
rules:
- id: test-entropy
patterns:
- pattern: "$STRING"
- metavariable-analysis:
metavariable: $STRING
analyzer:
kind: entropy_v2
mode: badass
message: Semgrep found a match
languages: [python]
severity: WARNING
12 changes: 12 additions & 0 deletions tests/jsonschema/rules/entropy_v2-mode.ok.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
rules:
- id: test-entropy
patterns:
- pattern: "$STRING"
- metavariable-analysis:
metavariable: $STRING
analyzer:
kind: entropy_v2
mode: strict
message: Semgrep found a match
languages: [python]
severity: WARNING
11 changes: 11 additions & 0 deletions tests/jsonschema/rules/entropy_v2-no-mode.ok.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
rules:
- id: test-entropy
patterns:
- pattern: "$STRING"
- metavariable-analysis:
metavariable: $STRING
analyzer:
kind: entropy_v2
message: Semgrep found a match
languages: [python]
severity: WARNING
10 changes: 10 additions & 0 deletions tests/jsonschema/rules/entropy_v2-short.ok.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
rules:
- id: test-entropy
patterns:
- pattern: "$STRING"
- metavariable-analysis:
metavariable: $STRING
analyzer: entropy_v2
message: Semgrep found a match
languages: [python]
severity: WARNING
33 changes: 33 additions & 0 deletions tests/jsonschema/validate
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#! /usr/bin/env bash
#
# Use a JSON Schema validator to check input files that should pass or fail.
#
# A file with the .ok.yaml extension is expected to pass validation.
# A file with the .fail.yaml extenstion is expected to fail.
#
set -eu

schema_file=$1
input_dir=$2

# Check that check-jsonschema is installed
# Installation instructions:
# https://github.com/python-jsonschema/check-jsonschema
check-jsonschema --version

# Check well-formed files
check-jsonschema --schemafile "$schema_file" "$input_dir"/*.ok.yaml

exit_code=0

# Check that malformed files are detected
for input_file in "$input_dir"/*.fail.yaml; do
if check-jsonschema --schemafile "$schema_file" "$input_file"; then
echo "*** $input_file: should have failed validation" >&2
exit_code=1
else
echo "XFAIL (failed as expected)"
fi
done

exit "$exit_code"