Skip to content

Commit

Permalink
update mupdf
Browse files Browse the repository at this point in the history
  • Loading branch information
kjk committed Nov 3, 2023
1 parent b4c744b commit f7f3f39
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 5 deletions.
2 changes: 1 addition & 1 deletion mupdf/include/mupdf/fitz/archive.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ typedef struct
}
fz_archive_handler;

extern const fz_archive_handler fz_libarchive_archive_handler;
FZ_DATA extern const fz_archive_handler fz_libarchive_archive_handler;

void fz_register_archive_handler(fz_context *ctx, const fz_archive_handler *handler);

Expand Down
23 changes: 22 additions & 1 deletion mupdf/source/html/html-doc.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,26 @@ mobi_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *
return -1;
}

static int
txt_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, size_t size)
{
html_document *doc = (html_document*)doc_;
if (!strcmp(key, FZ_META_FORMAT))
return (int)fz_strlcpy(buf, "Text", size);
return -1;
}

static int
office_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, size_t size)
{
html_document *doc = (html_document*)doc_;
if (!strcmp(key, FZ_META_FORMAT))
return (int)fz_strlcpy(buf, "Office document", size);
if (!strcmp(key, FZ_META_INFO_TITLE) && doc->html->title)
return 1 + (int)fz_strlcpy(buf, doc->html->title, size);
return -1;
}

static fz_document *
htdoc_open_document_with_buffer(fz_context *ctx, fz_archive *zip, fz_buffer *buf, int format)
{
Expand All @@ -244,7 +264,8 @@ htdoc_open_document_with_buffer(fz_context *ctx, fz_archive *zip, fz_buffer *buf
case FORMAT_HTML5: doc->super.lookup_metadata = htdoc_lookup_metadata; break;
case FORMAT_XHTML: doc->super.lookup_metadata = xhtdoc_lookup_metadata; break;
case FORMAT_MOBI: doc->super.lookup_metadata = mobi_lookup_metadata; break;
case FORMAT_OFFICE: doc->super.lookup_metadata = NULL; break;
case FORMAT_TXT: doc->super.lookup_metadata = txt_lookup_metadata; break;
case FORMAT_OFFICE: doc->super.lookup_metadata = office_lookup_metadata; break;
}
doc->super.is_reflowable = 1;

Expand Down
45 changes: 45 additions & 0 deletions mupdf/source/html/office.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ typedef struct

int footnotes_max;
char **footnotes;

char *title;
} doc_info;

static void
Expand Down Expand Up @@ -1037,6 +1039,35 @@ process_office_document(fz_context *ctx, fz_archive *arch, const char *file, doc
fz_rethrow(ctx);
}

static void
process_office_document_properties(fz_context *ctx, fz_archive *arch, const char *file, doc_info *info)
{
fz_xml *xml = NULL;
char *title;

fz_try(ctx)
{
fz_xml *pos;

xml = fz_parse_xml_archive_entry(ctx, arch, file, 1);

pos = fz_xml_find_dfs(xml, "title", NULL, NULL);
title = fz_xml_text(fz_xml_down(pos));
if (title)
{
fz_write_string(ctx, info->out, "<title>");
doc_escape(ctx, info->out, title);
fz_write_string(ctx, info->out, "</title>");
}
}
fz_always(ctx)
{
fz_drop_xml(ctx, xml);
}
fz_catch(ctx)
fz_rethrow(ctx);
}

fz_buffer *
fz_office_to_html(fz_context *ctx, fz_html_font_set *set, fz_buffer *buffer_in, const char *user_css, fz_office_to_html_opts *opts)
{
Expand All @@ -1047,6 +1078,7 @@ fz_office_to_html(fz_context *ctx, fz_html_font_set *set, fz_buffer *buffer_in,
fz_xml *pos = NULL;
fz_xml *rels = NULL;
const char *schema = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
const char *schema_props = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties";
doc_info info = { 0 };

stream = fz_open_buffer(ctx, buffer_in);
Expand Down Expand Up @@ -1085,6 +1117,19 @@ fz_office_to_html(fz_context *ctx, fz_html_font_set *set, fz_buffer *buffer_in,
/* Try other types */
{
xml = try_parse_xml_archive_entry(ctx, archive, "_rels/.rels", 0);

fz_write_string(ctx, info.out, "<html>\n");

pos = fz_xml_find_dfs(xml, "Relationship", "Type", schema_props);
if (pos)
{
const char *file = fz_xml_att(pos, "Target");
fz_write_string(ctx, info.out, "<head>\n");
process_office_document_properties(ctx, archive, file, &info);
fz_write_string(ctx, info.out, "</head>\n");
}

fz_write_string(ctx, info.out, "<body>\n");
pos = fz_xml_find_dfs(xml, "Relationship", "Type", schema);
if (!pos)
fz_throw(ctx, FZ_ERROR_GENERIC, "Archive not docx.");
Expand Down
15 changes: 12 additions & 3 deletions mupdf/source/pdf/pdf-op-run.c
Original file line number Diff line number Diff line change
Expand Up @@ -1840,6 +1840,7 @@ push_marked_content(fz_context *ctx, pdf_run_processor *proc, const char *tagstr
int drop_tag = 1;
fz_structure standard;
pdf_obj *mc_dict = NULL;
int fallback = 0;

/* Flush any pending text so it's not in the wrong layer. */
pdf_flush_text(ctx, proc);
Expand Down Expand Up @@ -1873,10 +1874,18 @@ push_marked_content(fz_context *ctx, pdf_run_processor *proc, const char *tagstr

/* Structure */
if (mc_dict)
send_begin_structure(ctx, proc, mc_dict);
else
{
/* Maybe drop this entirely? */
fz_try(ctx)
send_begin_structure(ctx, proc, mc_dict);
fz_catch(ctx)
{
fz_warn(ctx, "structure tree broken, assume tree is missing: %s", fz_caught_message(ctx));
fallback = 1;
}
}

if (!mc_dict || fallback)
{
standard = structure_type(ctx, proc, tag);
if (standard != FZ_STRUCTURE_INVALID)
{
Expand Down

0 comments on commit f7f3f39

Please sign in to comment.