No OneTemporary
Actions

Authored By

Unknown

Size

24 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/src/UTF8Functions.cpp b/src/UTF8Functions.cpp
	index 80e5717..2c34b61 100644
	--- a/src/UTF8Functions.cpp
	+++ b/src/UTF8Functions.cpp
	@@ -1,471 +1,512 @@
	/*
	* Copyright (C) 2019 Me and My Shadow
	*
	* This file is part of Me and My Shadow.
	*
	* Me and My Shadow is free software: you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation, either version 3 of the License, or
	* (at your option) any later version.
	*
	* Me and My Shadow is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with Me and My Shadow. If not, see <http://www.gnu.org/licenses/>.
	*/

	#include <stdio.h>
	#include <math.h>
	#include <string.h>
	#include <algorithm>
	#include <string>
	#include "UTF8Functions.h"

	// A helper function to read a character from utf8 string
	// s: the string
	// p [in,out]: the position
	// return value: the character readed, in utf32 format, 0 means end of string, -1 means error
	int utf8ReadForward(const char* s, int& p) {
	int ch = (unsigned char)s[p];
	if (ch < 0x80){
	if (ch) p++;
	return ch;
	} else if (ch < 0xC0){
	// skip invalid characters
	while (((unsigned char)s[p] & 0xC0) == 0x80) p++;
	return -1;
	} else if (ch < 0xE0){
	int c2 = (unsigned char)s[++p];
	if ((c2 & 0xC0) != 0x80) return -1;

	ch = ((ch & 0x1F) << 6) \| (c2 & 0x3F);
	p++;
	return ch;
	} else if (ch < 0xF0){
	int c2 = (unsigned char)s[++p];
	if ((c2 & 0xC0) != 0x80) return -1;
	int c3 = (unsigned char)s[++p];
	if ((c3 & 0xC0) != 0x80) return -1;

	ch = ((ch & 0xF) << 12) \| ((c2 & 0x3F) << 6) \| (c3 & 0x3F);
	p++;
	return ch;
	} else if (ch < 0xF8){
	int c2 = (unsigned char)s[++p];
	if ((c2 & 0xC0) != 0x80) return -1;
	int c3 = (unsigned char)s[++p];
	if ((c3 & 0xC0) != 0x80) return -1;
	int c4 = (unsigned char)s[++p];
	if ((c4 & 0xC0) != 0x80) return -1;

	ch = ((ch & 0x7) << 18) \| ((c2 & 0x3F) << 12) \| ((c3 & 0x3F) << 6) \| (c4 & 0x3F);
	if (ch >= 0x110000) ch = -1;
	p++;
	return ch;
	} else {
	p++;
	return -1;
	}
	}

	// A helper function to read a character backward from utf8 string (experimental)
	// s: the string
	// p [in,out]: the position
	// return value: the character readed, in utf32 format, 0 means end of string, -1 means error
	int utf8ReadBackward(const char* s, int& p) {
	if (p <= 0) return 0;

	do {
	p--;
	} while (p > 0 && ((unsigned char)s[p] & 0xC0) == 0x80);

	int tmp = p;
	return utf8ReadForward(s, tmp);
	}

	const char* utf8GoToNextCharacter(const char* s) {
	if (*s == 0) return s;
	do {
	s++;
	} while (((unsigned char)(*s) & 0xC0) == 0x80);
	return s;
	}

	const char* utf8GoToPrevCharacter(const char* s) {
	do {
	s--;
	} while (((unsigned char)(*s) & 0xC0) == 0x80);
	return s;
	}

	bool utf32IsSpace(int ch) {
	//ripped from the output of glib-2.60.0
	switch (ch) {
	case 0x9: case 0xA: case 0xC: case 0xD: case 0x20: case 0xA0: case 0x1680:
	case 0x2028: case 0x2029: case 0x202F: case 0x205F: case 0x3000:
	return true;
	default:
	return (ch >= 0x2000 && ch <= 0x200A);
	}
	}

	bool utf32IsAlpha(int ch) {
	//ripped from the output of glib-2.60.0 (only a subset)

	static const int ranges_65_247[] = {
	65, 26,
	97, 26,
	170, 1,
	181, 1,
	186, 1,
	192, 23,
	216, 31,
	};
	static const int ranges_248_751[] = {
	248, 458,
	710, 12,
	736, 5,
	748, 1,
	750, 1,
	};
	static const int ranges_880_1328[] = {
	880, 5,
	886, 2,
	890, 4,
	895, 1,
	902, 1,
	904, 3,
	908, 1,
	910, 20,
	931, 83,
	1015, 139,
	1162, 166,
	};

	// skip 0x0530 - 0x1CFF

	static const int ranges_7424_8189[] = {
	7424, 192,
	7680, 278,
	7960, 6,
	7968, 38,
	8008, 6,
	8016, 8,
	8025, 1,
	8027, 1,
	8029, 1,
	8031, 31,
	8064, 53,
	8118, 7,
	8126, 1,
	8130, 3,
	8134, 7,
	8144, 4,
	8150, 6,
	8160, 13,
	8178, 3,
	8182, 7,
	};

	// skip 0x2000 - 0x10FFFF

	const int *ranges = NULL;
	int rangeSize = 0;

	#define RANGE(LPS,LPE) ranges_##LPS##_##LPE
	#define CHECK_RANGE(LPS,LPE) \
	else if (ch < LPS) { \
	} else if (ch < LPE) { \
	ranges = RANGE(LPS,LPE); rangeSize = sizeof(RANGE(LPS,LPE)) / sizeof(RANGE(LPS,LPE)[0]); \
	}

	if (false) {}
	CHECK_RANGE(65, 247)
	CHECK_RANGE(248, 751)
	CHECK_RANGE(880, 1328)
	CHECK_RANGE(7424, 8189)

	for (int i = 0; i < rangeSize; i += 2) {
	const int lps = ranges[i];
	const int lpe = lps + ranges[i + 1];
	if (ch < lps) break;
	else if (ch < lpe) {
	return true;
	}
	}

	return false;
	}

	+bool utf32IsCJK(int ch) {
	+ return (ch >= 0x002E80 && ch <= 0x009FFF) /* CJK scripts and symbols */
	+ \|\| (ch >= 0x00F900 && ch <= 0x00FAFF) /* CJK Compatibility Ideographs */
	+ \|\| (ch >= 0x00FE30 && ch <= 0x00FE4F) /* CJK Compatibility Forms */
	+ \|\| (ch >= 0x020000 && ch <= 0x03FFFF) /* Supplementary Ideographic Plane & Tertiary Ideographic Plane */
	+ ;
	+}
	+
	+bool utf32IsCJKEndingPunctuation(int ch) {
	+ //ripped from M$ Word
	+ switch (ch) {
	+ case 0x21: case 0x25: case 0x29: case 0x2C: case 0x2E: case 0x3A: case 0x3B: case 0x3E: case 0x3F: case 0x5D: case 0x7D:
	+ case 0xA2: case 0xA8: case 0xB0: case 0xB7:
	+ case 0x2C7: case 0x2C9:
	+ case 0x2015: case 0x2016: case 0x2019: case 0x201D: case 0x2026: case 0x2030: case 0x2032: case 0x2033: case 0x203A: case 0x2103: case 0x2236:
	+ case 0x3001: case 0x3002: case 0x3003: case 0x3009: case 0x300B: case 0x300D: case 0x300F: case 0x3011: case 0x3015: case 0x3017: case 0x301E:
	+ case 0x0FE36: case 0x0FE3A: case 0x0FE3E: case 0x0FE40: case 0x0FE44: case 0x0FE5A: case 0x0FE5C: case 0x0FE5E:
	+ case 0x0FF01: case 0x0FF02: case 0x0FF05: case 0x0FF07: case 0x0FF09: case 0x0FF0C: case 0x0FF0E: case 0x0FF1A: case 0x0FF1B: case 0x0FF1F:
	+ case 0x0FF3D: case 0x0FF40: case 0x0FF5C: case 0x0FF5D: case 0x0FF5E: case 0x0FFE0:
	+ return true;
	+ default:
	+ return false;
	+ }
	+}
	+
	+bool utf32IsCJKStartingPunctuation(int ch) {
	+ //ripped from M$ Word
	+ switch (ch) {
	+ case 0x24: case 0x28: case 0x5B: case 0x7B:
	+ case 0xA3: case 0xA5: case 0xB7:
	+ case 0x2018: case 0x201C:
	+ case 0x3008: case 0x300A: case 0x300C: case 0x300E: case 0x3010: case 0x3014: case 0x3016: case 0x301D:
	+ case 0x0FE59: case 0x0FE5B: case 0x0FE5D:
	+ case 0x0FF04: case 0x0FF08: case 0x0FF0E:
	+ case 0x0FF3B: case 0x0FF5B: case 0x0FFE1: case 0x0FFE5:
	+ return true;
	+ default:
	+ return false;
	+ }
	+}
	+
	int utf32ToLower(int ch) {
	//ripped from the output of glib-2.60.0

	static const int ranges_65_223[] = {
	65, 26, 32,
	192, 23, 32,
	216, 7, 32,
	};
	static const int ranges_304_504[] = {
	304, 1, -199,
	376, 1, -121,
	385, 1, 210,
	390, 1, 206,
	393, 2, 205,
	398, 1, 79,
	399, 1, 202,
	400, 1, 203,
	403, 1, 205,
	404, 1, 207,
	406, 1, 211,
	407, 1, 209,
	412, 1, 211,
	413, 1, 213,
	415, 1, 214,
	422, 1, 218,
	425, 1, 218,
	430, 1, 218,
	433, 2, 217,
	439, 1, 219,
	452, 1, 2,
	455, 1, 2,
	458, 1, 2,
	497, 1, 2,
	502, 1, -97,
	503, 1, -56,
	};
	static const int ranges_544_582[] = {
	544, 1, -130,
	570, 1, 10795,
	573, 1, -163,
	574, 1, 10792,
	579, 1, -195,
	580, 1, 69,
	581, 1, 71,
	};
	static const int ranges_895_1018[] = {
	895, 1, 116,
	902, 1, 38,
	904, 3, 37,
	908, 1, 64,
	910, 2, 63,
	913, 17, 32,
	931, 9, 32,
	975, 1, 8,
	1012, 1, -60,
	1017, 1, -7,
	};
	static const int ranges_1021_1367[] = {
	1021, 3, -130,
	1024, 16, 80,
	1040, 32, 32,
	1216, 1, 15,
	1329, 38, 48,
	};
	static const int ranges_4256_5110[] = {
	4256, 38, 7264,
	4295, 1, 7264,
	4301, 1, 7264,
	5024, 80, 38864,
	5104, 6, 8,
	};
	static const int ranges_7312_8499[] = {
	7312, 43, -3008,
	7357, 3, -3008,
	7838, 1, -7615,
	7944, 8, -8,
	7960, 6, -8,
	7976, 8, -8,
	7992, 8, -8,
	8008, 6, -8,
	8025, 1, -8,
	8027, 1, -8,
	8029, 1, -8,
	8031, 1, -8,
	8040, 8, -8,
	8072, 8, -8,
	8088, 8, -8,
	8104, 8, -8,
	8120, 2, -8,
	8122, 2, -74,
	8124, 1, -9,
	8136, 4, -86,
	8140, 1, -9,
	8152, 2, -8,
	8154, 2, -100,
	8168, 2, -8,
	8170, 2, -112,
	8172, 1, -7,
	8184, 2, -128,
	8186, 2, -126,
	8188, 1, -9,
	8486, 1, -7517,
	8490, 1, -8383,
	8491, 1, -8262,
	8498, 1, 28,
	};
	static const int ranges_11264_11392[] = {
	11264, 47, 48,
	11362, 1, -10743,
	11363, 1, -3814,
	11364, 1, -10727,
	11373, 1, -10780,
	11374, 1, -10749,
	11375, 1, -10783,
	11376, 1, -10782,
	11390, 2, -10815,
	};
	static const int ranges_42877_42932[] = {
	42877, 1, -35332,
	42893, 1, -42280,
	42922, 1, -42308,
	42923, 1, -42319,
	42924, 1, -42315,
	42925, 1, -42305,
	42926, 1, -42308,
	42928, 1, -42258,
	42929, 1, -42282,
	42930, 1, -42261,
	42931, 1, 928,
	};
	static const int ranges_65313_125218[] = {
	65313, 26, 32,
	66560, 40, 40,
	66736, 36, 40,
	68736, 51, 64,
	71840, 32, 32,
	93760, 32, 32,
	125184, 34, 34,
	};

	static const int ranges2_256_440[] = {
	256, 302,
	306, 310,
	313, 327,
	330, 374,
	377, 381,
	386, 388,
	391, 391,
	395, 395,
	401, 401,
	408, 408,
	416, 420,
	423, 423,
	428, 428,
	431, 431,
	435, 437,
	440, 440,
	};
	static const int ranges2_444_590[] = {
	444, 444,
	453, 453,
	456, 456,
	459, 475,
	478, 494,
	498, 500,
	504, 542,
	546, 562,
	571, 571,
	577, 577,
	582, 590,
	};
	static const int ranges2_880_1326[] = {
	880, 882,
	886, 886,
	984, 1006,
	1015, 1015,
	1018, 1018,
	1120, 1152,
	1162, 1214,
	1217, 1229,
	1232, 1326,
	};
	static const int ranges2_7680_11506[] = {
	7680, 7828,
	7840, 7934,
	8579, 8579,
	11360, 11360,
	11367, 11371,
	11378, 11378,
	11381, 11381,
	11392, 11490,
	11499, 11501,
	11506, 11506,
	};
	static const int ranges2_42560_42936[] = {
	42560, 42604,
	42624, 42650,
	42786, 42798,
	42802, 42862,
	42873, 42875,
	42878, 42886,
	42891, 42891,
	42896, 42898,
	42902, 42920,
	42932, 42936,
	};

	const int ranges = NULL, ranges2 = NULL;
	int rangeSize = 0, range2Size = 0;

	#define RANGE(LPS,LPE) ranges_##LPS##_##LPE
	#define CHECK_RANGE(LPS,LPE) \
	else if (ch < LPS) { \
	} else if (ch < LPE) { \
	ranges = RANGE(LPS,LPE); rangeSize = sizeof(RANGE(LPS,LPE)) / sizeof(RANGE(LPS,LPE)[0]); \
	}

	#define RANGE2(LPS,LPE) ranges2_##LPS##_##LPE
	#define CHECK_RANGE2(LPS,LPE) \
	else if (ch < LPS) { \
	} else if (ch <= LPE) { \
	ranges2 = RANGE2(LPS,LPE); range2Size = sizeof(RANGE2(LPS,LPE)) / sizeof(RANGE2(LPS,LPE)[0]); \
	}

	if (false) {}
	CHECK_RANGE(65, 223)
	CHECK_RANGE(304, 504)
	CHECK_RANGE(544, 582)
	CHECK_RANGE(895, 1018)
	CHECK_RANGE(1021, 1367)
	CHECK_RANGE(4256, 5110)
	CHECK_RANGE(7312, 8499)
	CHECK_RANGE(11264, 11392)
	CHECK_RANGE(42877, 42932)
	CHECK_RANGE(65313, 125218)

	for (int i = 0; i < rangeSize; i += 3) {
	const int lps = ranges[i];
	const int lpe = lps + ranges[i + 1];
	if (ch < lps) break;
	else if (ch < lpe) {
	return ch + ranges[i + 2];
	}
	}

	if (false) {}
	CHECK_RANGE2(256, 440)
	CHECK_RANGE2(444, 590)
	CHECK_RANGE2(880, 1326)
	CHECK_RANGE2(7680, 11506)
	CHECK_RANGE2(42560, 42936)

	for (int i = 0; i < range2Size; i += 2) {
	const int lps = ranges2[i];
	const int lpe = ranges2[i + 1];
	if (ch < lps) break;
	else if (ch <= lpe) {
	if (((ch - lps) & 0x1) == 0) return ch + 1;
	else break;
	}
	}

	#undef RANGE
	#undef RANGE2
	#undef CHECK_RANGE
	#undef CHECK_RANGE2

	return ch;
	}
	diff --git a/src/UTF8Functions.h b/src/UTF8Functions.h
	index 19f7a25..c34345e 100644
	--- a/src/UTF8Functions.h
	+++ b/src/UTF8Functions.h
	@@ -1,159 +1,162 @@
	/*
	* Copyright (C) 2019 Me and My Shadow
	*
	* This file is part of Me and My Shadow.
	*
	* Me and My Shadow is free software: you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation, either version 3 of the License, or
	* (at your option) any later version.
	*
	* Me and My Shadow is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with Me and My Shadow. If not, see <http://www.gnu.org/licenses/>.
	*/

	#ifndef UTF8FUNCTIONS_H
	#define UTF8FUNCTIONS_H

	// A helper function to read a character from utf8 string and advance the pointer
	// s: the string
	// p [in,out]: the position
	// return value: the character readed, in utf32 format, 0 means end of string, -1 means error
	int utf8ReadForward(const char* s, int& p);

	// A helper function to read a character backward from utf8 string and advance the pointer (experimental)
	// s: the string
	// p [in,out]: the position
	// return value: the character readed, in utf32 format, 0 means end of string, -1 means error
	int utf8ReadBackward(const char* s, int& p);

	// A helper function to read the first character from utf8 string
	// s: the string
	// return value: the character readed, in utf32 format, 0 means end of string, -1 means error
	// NOTE: Consider utf8ReadForward() instead if you want to read multiple characters
	inline int utf8GetCharacter(const char* s) {
	int tmp = 0;
	return utf8ReadForward(s, tmp);
	}

	// A helper function to advance the pointer in a utf8 string to next character
	// s: the pointer
	// return value: the new pointer
	// WARNING: there is no sanity check!
	const char* utf8GoToNextCharacter(const char* s);

	// A helper function to advance the pointer in a utf8 string to previous character
	// s: the pointer
	// return value: the new pointer
	// WARNING: there is no sanity check!
	const char* utf8GoToPrevCharacter(const char* s);

	bool utf32IsSpace(int ch);
	bool utf32IsAlpha(int ch);
	+bool utf32IsCJK(int ch);
	+bool utf32IsCJKEndingPunctuation(int ch); // check if the character should't be at start of line in CJK mode
	+bool utf32IsCJKStartingPunctuation(int ch); // check if the character should't be at end of line in CJK mode
	int utf32ToLower(int ch);

	#define U8STRING_FOR_EACH_CHARACTER_DO_BEGIN(STR,I,M,CH,INVALID_CH) \
	for(size_t I=0;I<M;I++){ \
	int CH=(unsigned char)STR[I]; \
	if(CH<0x80){ \
	}else if(CH<0xC0){ \
	CH=INVALID_CH; \
	}else if(CH<0xE0){ \
	if(I+1>=M) CH=INVALID_CH; \
	else{ \
	int c2=(unsigned char)STR[I+1]; \
	if((c2&0xC0)!=0x80) CH=INVALID_CH; \
	else{ \
	CH=((CH & 0x1F)<<6) \| (c2 & 0x3F); \
	I++; \
	} \
	} \
	}else if(CH<0xF0){ \
	if(I+2>=M) CH=INVALID_CH; \
	else{ \
	int c2=(unsigned char)STR[I+1]; \
	int c3=(unsigned char)STR[I+2]; \
	if((c2&0xC0)!=0x80 \|\| (c3&0xC0)!=0x80) CH=INVALID_CH; \
	else{ \
	CH=((CH & 0xF)<<12) \| ((c2 & 0x3F)<<6) \| (c3 & 0x3F); \
	I+=2; \
	} \
	} \
	}else if(CH<0xF8){ \
	if(I+3>=M) CH=INVALID_CH; \
	else{ \
	int c2=(unsigned char)STR[I+1]; \
	int c3=(unsigned char)STR[I+2]; \
	int c4=(unsigned char)STR[I+3]; \
	if((c2&0xC0)!=0x80 \|\| (c3&0xC0)!=0x80 \|\| (c4&0xC0)!=0x80) CH=INVALID_CH; \
	else{ \
	CH=((CH & 0x7)<<18) \| ((c2 & 0x3F)<<12) \| ((c3 & 0x3F)<<6) \| (c4 & 0x3F); \
	if(CH>=0x110000) CH=INVALID_CH; \
	else I+=3; \
	} \
	} \
	}else{ \
	CH=INVALID_CH; \
	}

	#define U8STRING_FOR_EACH_CHARACTER_DO_END() }

	#define U8_ENCODE(CH,OPERATION) \
	if(CH<0x80){ \
	OPERATION(CH); \
	}else if(CH<0x800){ \
	OPERATION(0xC0 \| (CH>>6)); \
	OPERATION(0x80 \| (CH & 0x3F)); \
	}else if(CH<0x10000){ \
	OPERATION(0xE0 \| (CH>>12)); \
	OPERATION(0x80 \| ((CH>>6) & 0x3F)); \
	OPERATION(0x80 \| (CH & 0x3F)); \
	}else{ \
	OPERATION(0xF0 \| (CH>>18)); \
	OPERATION(0x80 \| ((CH>>12) & 0x3F)); \
	OPERATION(0x80 \| ((CH>>6) & 0x3F)); \
	OPERATION(0x80 \| (CH & 0x3F)); \
	}

	#define U16STRING_FOR_EACH_CHARACTER_DO_BEGIN(STR,I,M,CH,INVALID_CH) \
	for(size_t I=0;I<M;I++){ \
	int CH=(unsigned short)(STR[I]); \
	if(CH<0xD800){ \
	}else if(CH<0xDC00){ \
	/* lead surrogate */ \
	I++; \
	if(I>=M) CH=INVALID_CH; \
	else{ \
	int c2=(unsigned short)STR[I]; \
	if(CH>=0xDC00 && CH<0xE000){ \
	/* trail surrogate */ \
	CH=0x10000 + (((CH & 0x3FF)<<10) \| (c2 & 0x3FF)); \
	}else{ \
	/* invalid */ \
	CH=INVALID_CH; \
	I--; \
	} \
	} \
	}else if(CH<0xE000){ \
	/* invalid trail surrogate */ \
	CH=INVALID_CH; \
	}

	#define U16STRING_FOR_EACH_CHARACTER_DO_END() }

	#define U16_ENCODE(CH,OPERATION) \
	if(CH<0x10000){ \
	OPERATION(CH); \
	}else{ \
	OPERATION(0xD800 \| ((CH-0x10000)>>10)); \
	OPERATION(0xDC00 \| (CH & 0x3FF)); \
	}

	const int REPLACEMENT_CHARACTER = 0x00FFFD;

	#endif
	diff --git a/src/WordWrapper.cpp b/src/WordWrapper.cpp
	index 3575f68..dea9605 100644
	--- a/src/WordWrapper.cpp
	+++ b/src/WordWrapper.cpp
	@@ -1,284 +1,305 @@
	/*
	* Copyright (C) 2019 Me and My Shadow
	*
	* This file is part of Me and My Shadow.
	*
	* Me and My Shadow is free software: you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation, either version 3 of the License, or
	* (at your option) any later version.
	*
	* Me and My Shadow is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with Me and My Shadow. If not, see <http://www.gnu.org/licenses/>.
	*/

	#include "WordWrapper.h"
	#include "HyphenationManager.h"
	#include "HyphenationRule.h"
	#include "UTF8Functions.h"

	#include <assert.h>

	#include <SDL_ttf_fontfallback.h>

	int WordWrapper::getTextWidth(const std::string& s) {
	if (s.empty()) return 0;

	int w = 0;

	if (font) {
	TTF_SizeUTF8(font, s.c_str(), &w, NULL);
	} else {
	const size_t m = s.size();

	U8STRING_FOR_EACH_CHARACTER_DO_BEGIN(s, i, m, ch, REPLACEMENT_CHARACTER);
	w++;
	U8STRING_FOR_EACH_CHARACTER_DO_END();
	}

	return w;
	}

	int WordWrapper::getGlyphWidth(int ch) {
	if (font) {
	int w = 0;
	TTF_GlyphMetrics(font, ch, NULL, NULL, NULL, NULL, &w);
	return w;
	} else {
	return 1;
	}
	}

	WordWrapper::WordWrapper()
	: font(NULL)
	, maxWidth(0)
	, wordWrap(false)
	, reserveHyperlinks(false)
	{
	}

	WordWrapper::~WordWrapper() {
	}

	bool WordWrapper::isReserved(const std::string& word) {
	if (reserveHyperlinks) {
	const char *s = word.c_str();
	const size_t m = word.size();
	for (size_t i = 0; i < m; i++) {
	// we only support http or https
	if ((s[i] == 'H' \|\| s[i] == 'h')
	&& (s[i + 1] == 'T' \|\| s[i + 1] == 't')
	&& (s[i + 2] == 'T' \|\| s[i + 2] == 't')
	&& (s[i + 3] == 'P' \|\| s[i + 3] == 'p'))
	{
	if (s[i + 4] == ':' && s[i + 5] == '/' && s[i + 6] == '/') {
	// http
	return true;
	} else if ((s[i + 4] == 'S' \|\| s[i + 4] == 's') && s[i + 5] == ':' && s[i + 6] == '/' && s[i + 7] == '/') {
	// https
	return true;
	}
	}
	}
	}

	for (const std::string& s : reservedWords) {
	if (word == s) return true;
	}

	return false;
	}

	void WordWrapper::addString(std::vector<std::string>& output, const std::string& input) {
	std::string line;

	for (char c : input) {
	if (c == '\r') {
	} else if (c == '\n') {
	addLine(output, line);
	line.clear();
	} else {
	line.push_back(c);
	}
	}

	addLine(output, line);
	}

	// Add a word to line, output the line only if the line+newWord doesn't fit the width and in this case put the newWord to the line.
	void WordWrapper::addWord(std::vector<std::string>& output, std::string& line, int& lineWidth, const std::string& spaces, const std::string& nonSpaces) {
	int w1 = getTextWidth(spaces);

	{
	int w2 = getTextWidth(nonSpaces);

	//Check if it fits into current line.
	if (lineWidth + w1 + w2 <= maxWidth) {
	line += spaces + nonSpaces;
	lineWidth += w1 + w2;
	return;
	}

	//Now it doesn't fit into current line.

	//Check if we should skip the hyphenation.
	if (hyphen.empty() \|\| isReserved(nonSpaces)) {
	if (line.empty()) {
	//A line consists of at least one word, so we append it forcefully.
	line += spaces + nonSpaces;
	lineWidth += w1 + w2;
	} else {
	//We output current line.
	output.push_back(line);

	//And add a new line consisting of new word (but we remove spaces in it).
	line = nonSpaces;
	lineWidth = w2;
	}
	return;
	}
	}

	auto hm = getHyphenationManager();
	auto hyphenator = hyphenatorLanguage.empty() ? hm->getHyphenator() : hm->getHyphenator(hyphenatorLanguage);
	auto rules = hyphenator->applyHyphenationRules(nonSpaces);

	const size_t m = nonSpaces.size();

	std::string tmp, prev;
	int skip = 0, prevSkip = 0, prevWidth = 0;
	size_t prevIndex = 0;

	for (size_t i = 0;; i++) {
	const Hyphenate::HyphenationRule rule = (i < m) ? (rules)[i] : NULL;
	if (rule \|\| i == m) {
	std::string tmp2 = tmp;
	if (rule) rule->apply_first(tmp2, hyphen);

	int newWidth = getTextWidth(tmp2);

	- //debug
	- printf("%-5d %s\n", newWidth, tmp2.c_str());
	+ /*//debug
	+ printf("%-5d %s\n", newWidth, tmp2.c_str());*/

	//Check if we should output current line directly.
	if (lineWidth + w1 + newWidth > maxWidth && prev.empty() && !line.empty()) {
	//We output current line.
	output.push_back(line);

	line.clear();
	lineWidth = 0;
	w1 = 0;
	}

	//Check if the line is still too long.
	if (lineWidth + w1 + newWidth > maxWidth) {
	//Check if we have previous available hyphenation
	if (prev.empty()) {
	//Line is empty, we have to append it forcefully.
	assert(line.empty());

	if (w1 > 0) line += spaces;
	line += tmp2;
	if (i < m) {
	output.push_back(line);
	line.clear();
	lineWidth = 0;
	w1 = 0;
	} else {
	lineWidth += w1 + newWidth;
	}

	//Update buffer
	tmp.clear();
	if (rule) skip += rule->apply_second(tmp);
	} else {
	//We use previous available hyphenation
	if (w1 > 0) line += spaces;
	output.push_back(line + prev);
	line.clear();
	lineWidth = 0;
	w1 = 0;

	//Rewind
	prev.clear();
	prevWidth = 0;
	skip = prevSkip;
	i = prevIndex;

	//Update buffer
	tmp.clear();
	rule = (*rules)[i];
	assert(rule != NULL);
	skip += rule->apply_second(tmp);
	}
	} else if (i == m) {
	//Output last part
	if (w1 > 0) line += spaces;
	line += tmp2;
	lineWidth += w1 + newWidth;
	} else if (newWidth > prevWidth) {
	//Update prev hyphenation
	prev = tmp2;
	prevSkip = skip;
	prevWidth = newWidth;
	prevIndex = i;
	}
	}

	if (i >= m) break;

	if (skip > 0) skip--;
	else tmp.push_back(nonSpaces[i]);
	}
	}

	void WordWrapper::addLine(std::vector<std::string>& output, const std::string& input) {
	if (!wordWrap) {
	//Word wrap is not enabled, simply add it to output
	output.push_back(input);
	return;
	}

	const size_t m = input.size();

	std::string spaces, nonSpaces, line;
	int lineWidth = 0;

	+ bool prevIsCJK = false, prevIsCJKStarting = false;
	+
	U8STRING_FOR_EACH_CHARACTER_DO_BEGIN(input, i, m, ch, REPLACEMENT_CHARACTER);

	//A word consists of a sequence of white spaces and a sequence of non-white-spaces.

	//TODO: For CJK should only read one CJK character (possibly with a punctuation mark)

	if (utf32IsSpace(ch)) {
	+ prevIsCJK = false;
	+ prevIsCJKStarting = false;
	if (!nonSpaces.empty()) {
	addWord(output, line, lineWidth, spaces, nonSpaces);
	spaces.clear();
	nonSpaces.clear();
	}
	U8_ENCODE(ch, spaces.push_back);
	} else {
	+ if (prevIsCJK) {
	+ //Output the CJK character immediately unless current character can't be at start of line
	+ if (!utf32IsCJKEndingPunctuation(ch)) {
	+ addWord(output, line, lineWidth, spaces, nonSpaces);
	+ spaces.clear();
	+ nonSpaces.clear();
	+ }
	+ } else if (!nonSpaces.empty()) {
	+ //Output the existing non-CJK character immediately unless it can't be at end of line
	+ if (!prevIsCJKStarting) {
	+ addWord(output, line, lineWidth, spaces, nonSpaces);
	+ spaces.clear();
	+ nonSpaces.clear();
	+ }
	+ }
	+ prevIsCJK = utf32IsCJK(ch);
	+ prevIsCJKStarting = utf32IsCJKStartingPunctuation(ch);
	U8_ENCODE(ch, nonSpaces.push_back);
	}

	U8STRING_FOR_EACH_CHARACTER_DO_END();

	//FIXME: Here we temporarily ignore trailing spaces
	if (!nonSpaces.empty()) {
	addWord(output, line, lineWidth, spaces, nonSpaces);
	}

	//Output the remaining text.
	output.push_back(line);
	}

	void WordWrapper::addLines(std::vector<std::string>& output, const std::vector<std::string>& input) {
	for (const std::string& s : input) {
	addLine(output, s);
	}
	}

File Metadata

Mime Type: text/x-diff
Expires: Sat, May 9, 7:57 PM (6 d, 22 h ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 62863
Default Alt Text: (24 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions