Skip to content

Commit

Permalink
PDFBOX-5902: provide singleton instances for heavily used Integer and…
Browse files Browse the repository at this point in the history
… byte[] values to avoid multiple instances

git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1922578 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
lehmi committed Dec 18, 2024
1 parent 3fdb0cf commit 3338d0d
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 24 deletions.
29 changes: 5 additions & 24 deletions fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -332,25 +332,6 @@ private int toCIDFromRanges(byte[] code)
return 0;
}

/**
* Convert the given part of a byte array to an integer.
*
* @param data the byte array
* @param offset The offset into the byte array.
* @param length The length of the data we are getting.
* @return the resulting integer
*/
private int getCodeFromArray( byte[] data, int offset, int length )
{
int code = 0;
for( int i=0; i<length; i++ )
{
code <<= 8;
code |= (data[offset+i]+256)%256;
}
return code;
}

/**
* This will add a character code to Unicode character sequence mapping.
*
Expand All @@ -359,15 +340,15 @@ private int getCodeFromArray( byte[] data, int offset, int length )
*/
void addCharMapping(byte[] codes, String unicode)
{
unicodeToByteCodes.put(unicode, codes.clone()); // clone needed, bytes is modified later
int code = getCodeFromArray(codes, 0, codes.length);
if (codes.length == 1)
{
charToUnicodeOneByte.put(code, unicode);
charToUnicodeOneByte.put(CMapStrings.getIndexValue(codes), unicode);
unicodeToByteCodes.put(unicode, CMapStrings.getByteValue(codes));
}
else if (codes.length == 2)
{
charToUnicodeTwoBytes.put(code, unicode);
charToUnicodeTwoBytes.put(CMapStrings.getIndexValue(codes), unicode);
unicodeToByteCodes.put(unicode, CMapStrings.getByteValue(codes));
}
else
{
Expand All @@ -376,7 +357,7 @@ else if (codes.length == 2)
// fixme: ugly little hack
if (SPACE.equals(unicode))
{
spaceMapping = code;
spaceMapping = toInt(codes);
}
}

Expand Down
42 changes: 42 additions & 0 deletions fontbox/src/main/java/org/apache/fontbox/cmap/CMapStrings.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ public class CMapStrings
private static final List<String> twoByteMappings = new ArrayList<>(256 * 256);
private static final List<String> oneByteMappings = new ArrayList<>(256);

private static final List<Integer> indexValues = new ArrayList<>(256 * 256);
private static final List<byte[]> oneByteValues = new ArrayList<>(256);
private static final List<byte[]> twoByteValues = new ArrayList<>(256 * 256);

static
{
// create all mappings when loading the class to avoid concurrency issues
Expand All @@ -48,12 +52,15 @@ private static void fillMappings()
{
byte[] bytes = { (byte) i, (byte) j };
twoByteMappings.add(new String(bytes, StandardCharsets.UTF_16BE));
twoByteValues.add(bytes);
indexValues.add((i * 256) + j);
}
}
for (int i = 0; i < 256; i++)
{
byte[] bytes = { (byte) i };
oneByteMappings.add(new String(bytes, StandardCharsets.ISO_8859_1));
oneByteValues.add(bytes);
}
}

Expand All @@ -73,4 +80,39 @@ public static String getMapping(byte[] bytes)
return bytes.length == 1 ? oneByteMappings.get(CMap.toInt(bytes))
: twoByteMappings.get(CMap.toInt(bytes));
}

/**
* Get an Integer instance of the given combination of bytes. Each value is a singleton to avoid multiple instances
* for same value. The values are limited to one and two-byte sequences. Any longer byte sequence produces null as
* return value.
*
* @param bytes the given combination of bytes
* @return the Integer representation for the given combination of bytes
*/
public static Integer getIndexValue(byte[] bytes)
{
if (bytes.length > 2)
{
return null;
}
return indexValues.get(CMap.toInt(bytes));
}

/**
* Get a singleton instance of the given combination of bytes to avoid multiple instances for same value. The values
* are limited to one and two-byte sequences. Any longer byte sequence produces null as return value.
*
* @param bytes the given combination of bytes
* @return a singleton instance for the given combination of bytes
*/
public static byte[] getByteValue(byte[] bytes)
{
if (bytes.length > 2)
{
return null;
}
return bytes.length == 1 ? oneByteValues.get(CMap.toInt(bytes))
: twoByteValues.get(CMap.toInt(bytes));
}

}

0 comments on commit 3338d0d

Please sign in to comment.