Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GHA: replace markdown link checker with mdlinkcheck #527

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions .github/scripts/mdlinkcheck
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#!/usr/bin/env perl
#***************************************************************************
# _ _ ____ _
# Project ___| | | | _ \| |
# / __| | | | |_) | |
# | (__| |_| | _ <| |___
# \___|\___/|_| \_\_____|
#
# Copyright (C) Daniel Stenberg, <[email protected]>, et al.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://curl.se/docs/copyright.html.
#
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
# copies of the Software, and permit persons to whom the Software is
# furnished to do so, under the terms of the COPYING file.
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
# KIND, either express or implied.
#
# SPDX-License-Identifier: curl
#
###########################################################################

my %whitelist = (
'https://curl.se/' => 1,
'https://curl.se/changes.html' => 1,
'https://curl.se/dev/advisory.html' => 1,
'https://curl.se/dev/builds.html' => 1,
'https://curl.se/dev/code-style.html' => 1,
'https://curl.se/dev/contribute.html' => 1,
'https://curl.se/dev/internals.html' => 1,
'https://curl.se/dev/secprocess.html' => 1,
'https://curl.se/dev/sourceactivity.html' => 1,
'https://curl.se/docs/' => 1,
'https://curl.se/docs/bugbounty.html' => 1,
'https://curl.se/docs/caextract.html' => 1,
'https://curl.se/docs/copyright.html' => 1,
'https://curl.se/docs/install.html' => 1,
'https://curl.se/docs/knownbugs.html' => 1,
'https://curl.se/docs/manpage.html' => 1,
'https://curl.se/docs/security.html' => 1,
'https://curl.se/docs/sslcerts.html' => 1,
'https://curl.se/docs/thanks.html' => 1,
'https://curl.se/docs/todo.html' => 1,
'https://curl.se/docs/vulnerabilities.html' => 1,
'https://curl.se/libcurl/' => 1,
'https://curl.se/libcurl/c/CURLOPT_SSLVERSION.html' => 1,
'https://curl.se/libcurl/c/CURLOPT_SSL_CIPHER_LIST.html' => 1,
'https://curl.se/libcurl/c/CURLOPT_TLS13_CIPHERS.html' => 1,
'https://curl.se/libcurl/c/libcurl.html' => 1,
'https://curl.se/logo/curl-logo.svg' => 1,
'https://curl.se/mail/' => 1,
'https://curl.se/mail/etiquette.html' => 1,
'https://curl.se/mail/list.cgi?list=curl-distros' => 1,
'https://curl.se/mail/list.cgi?list=curl-library' => 1,
'https://curl.se/rfc/cookie_spec.html' => 1,
'https://curl.se/rfc/rfc2255.txt' => 1,
'https://curl.se/sponsors.html' => 1,
'https://curl.se/support.html' => 1,

'https://github.com/curl/curl' => 1,
'https://github.com/curl/curl-fuzzer' => 1,
'https://github.com/curl/curl-www' => 1,
'https://github.com/curl/curl/discussions' => 1,
'https://github.com/curl/curl/issues' => 1,
'https://github.com/curl/curl/labels/help%20wanted' => 1,
'https://github.com/curl/curl/pulls' => 1,

);

# list all .md files in the repo
my @files=`git ls-files '**.md'`;

sub storelink {
my ($f, $line, $link) = @_;
my $o = $link;

if($link =~ /^\#/) {
# ignore local-only links
return;
}
# cut off any anchor
$link =~ s:\#.*\z::;

if($link =~ /^(https|http):/) {
$url{$link} .= "$f:$line ";
return;
}

# a file link
my $dir = $f;
$dir =~ s:([^/]*\z)::;

while($link =~ s:^\.\.\/::) {
$dir =~ s:([^/]*)\/\z::;
}

$flink{"./$dir$link"} .= "$f:$line ";
}

sub findlinks {
my ($f) = @_;
my $line = 1;
open(F, "<:crlf", "$f") ||
return;

while(<F>) {
if(/\]\(([^)]*)/) {
my $link = $1;
#print "$f:$line $link\n";
storelink($f, $line, $link);
}
$line++;
}
close(F);
}

sub checkurl {
my ($url) = @_;

if($whitelist{$url}) {
#print "$url is whitelisted\n";
return 0;
}

print "check $url\n";
my $curlcmd="curl -ILfsm10 --retry 2 --retry-delay 5 -A \"Mozilla/curl.se link-probe\"";
my @content = `$curlcmd \"$url\"`;
if(!$content[0]) {
print STDERR "FAIL\n";
return 1; # fail
}
return 0; # ok
}

for my $f (@files) {
chomp $f;
findlinks($f);
}

my $error;

for my $u (sort keys %url) {
my $r = checkurl($u);

if($r) {
for my $f (split(/ /, $url{$l})) {
printf "%s ERROR links to missing URL %s\n", $f, $u;
$error++;
}
}
}

for my $l (sort keys %flink) {
if(! -r $l) {
for my $f (split(/ /, $flink{$l})) {
printf "%s ERROR links to missing file %s\n", $f, $l;
$error++;
}
}
}

exit 1 if ($error);
13 changes: 7 additions & 6 deletions .github/workflows/linkcheck.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# Docs: https://github.com/marketplace/actions/markdown-link-check

name: Markdown links

on:
Expand All @@ -17,7 +15,10 @@ jobs:
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- uses: gaurav-nelson/github-action-markdown-link-check@v1
with:
use-quiet-mode: 'yes'
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
with:
persist-credentials: false
name: checkout

- name: Run mdlinkcheck
run: ./.github/scripts/mdlinkcheck
Loading