Page Menu
Home
Phabricator (Chris)
Search
Configure Global Search
Log In
Files
F116751
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Authored By
Unknown
Size
32 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/src/HyphenationRule.cpp b/src/HyphenationRule.cpp
index e203f42..d1aadef 100644
--- a/src/HyphenationRule.cpp
+++ b/src/HyphenationRule.cpp
@@ -1,108 +1,108 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
/* This source file provides code for the HyphenationRule class which
* is documented in HyphenationRule.h */
#include "HyphenationRule.h"
#include <string.h>
using namespace std;
Hyphenate::HyphenationRule::HyphenationRule(std::string dpattern)
: del_pre(0), skip_post(0)
{
int priority = 0;
unsigned int i;
for (i = 0; i < dpattern.size() && dpattern[i] != '/'; i++)
if (dpattern[i] >= '0' && dpattern[i] <= '9')
priority = 10 * priority + dpattern[i] - '0';
else {
key += dpattern[i];
priorities.push_back(priority);
priority = 0;
}
/* Complete and simplify the array. */
priorities.push_back(priority);
while (priorities.back() == 0) priorities.pop_back();
/* Now check for nonstandard hyphenation. First, parse it. */
if (i < dpattern.size() && dpattern[i] == '/') {
i += 1; /* Ignore the /. */
int field = 1;
unsigned int start = 0, cut = 0;
for (; i < dpattern.size(); i++) {
if (field == 1 && dpattern[i] == '=')
field++;
else if (field >= 2 && field <= 3 && dpattern[i] == ',')
field++;
else if (field == 4 && (dpattern[i] < '0' || dpattern[i] > '9'))
break;
else if (field == 1)
insert_pre += dpattern[i];
else if (field == 2)
insert_post += dpattern[i];
else if (field == 3)
start = start * 10 + dpattern[i] - '0';
else if (field == 4)
cut = cut * 10 + dpattern[i] - '0';
}
if (field < 4) /* There was no fourth field */
cut = key.size() - start;
if (field < 3)
start = 1;
skip_post = cut;
for (unsigned int j = start; j < start + cut && j < priorities.size(); j++) {
if (priorities[j - 1] % 2 == 1) break;
del_pre++; skip_post--;
}
}
}
int Hyphenate::HyphenationRule::apply(string& word, const string &hyph) const
{
apply_first(word, hyph);
return apply_second(word);
}
void Hyphenate::HyphenationRule::apply_first(string& word, const string &hyph)
const
{
if (del_pre > 0) word.erase(word.size() - del_pre);
word += insert_pre;
word += hyph;
}
int Hyphenate::HyphenationRule::apply_second(string& word) const
{
word += insert_post;
return skip_post;
}
-auto_ptr<char> Hyphenate::HyphenationRule::replacement_string() const {
+char* Hyphenate::HyphenationRule::replacement_string() const {
string s = (insert_pre + "=" + insert_post);
char *r = (char *)malloc((s.size() + 1) * sizeof(char));
strcpy(r, s.c_str());
- return auto_ptr<char>(r);
+ return r;
}
diff --git a/src/HyphenationRule.h b/src/HyphenationRule.h
index ec3ef96..fd060f7 100644
--- a/src/HyphenationRule.h
+++ b/src/HyphenationRule.h
@@ -1,120 +1,119 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
#ifndef HYPHENATION_RULE_H
#define HYPHENATION_RULE_H
#include <string>
-#include <memory>
#include <vector>
namespace Hyphenate {
/** The HyphenationRule class represents a single Hyphenation Rule, that
* is, a pattern that has a number assigned to each letter and will,
* if applied, hyphenate a word at the given point. The number assigned
* to each letter and accessed by priority() is odd when hyphenation
* should occur before the letter, and only the rule with the highest
* number will be applied to any letter. */
class HyphenationRule {
private:
int del_pre, skip_post;
std::string key, insert_pre, insert_post;
std::vector<char> priorities;
std::string replacement;
public:
/* HyphenationRule is constructed from a string consisting of
* letters with numbers strewn in. The numbers are the priorities.
* In addition, a / will start a non-standard hyphenization. */
HyphenationRule(std::string source_string);
/** Call this method once an hyphen would, according to its base rule,
* be placed. Returns the number of bytes that should not be
* printed afterwards.
*
* For example, when applying the rules to "example", you should
* call the rules returned by HyphenationTree or Hyphenator as
* follows:
* string word = "ex";
* rule1.apply(word, "-");
* word += "am" ;
* rule2.apply(word, "-");
* word += "ple";
*
* Watch out for non-standard rules, though. Example: "Schiffahrt"
* string word = "Schif";
* int skip = rule1.apply(word, "-");
* char *rest = "fahrt";
* word += rest+skip;
*/
int apply(std::string& word, const std::string &hyphen) const;
/** Only apply the first part, that is, up to and including the
* hyphen. */
void apply_first(std::string& word, const std::string &hyphen) const;
/** Only apply the second part, after the hyphen. */
int apply_second(std::string& word) const;
/** Returns true iff there is a priority value != 0 for this offset
* or a larger one. */
inline bool hasPriority(unsigned int offset) const
{
return priorities.size() > offset;
}
/** Returns the hyphenation priority for a hyphen preceding the byte
* at the given offset. */
inline char priority(unsigned int offset) const { return priorities[offset]; }
/** Returns the pattern to match for this rule to apply. */
inline std::string &getKey() { return key; }
/** Returns the amount of bytes that will additionally be needed
* in front of the hyphen if this rule is applied. 0 for standard
* hyphenation, 1 for Schiff-fahrt. */
int spaceNeededPreHyphen() const
{
return insert_pre.size() - del_pre;
}
/** Returns true iff this rule is not a standard hyphenation rule. */
bool isNonStandard() const
{
return del_pre != 0 || skip_post != 0 ||
(!insert_pre.empty()) || (!insert_post.empty());
}
/** Only needed for libhnj implementation:
* Returns an malloc()-allocated char array consisting of the full
* replacement of this rule, with a = between the parts. For example,
* for Schiffahrt -> Schiff-fahrt this yields 'ff=' or 'ff=f',
* depending on implementation. */
- std::auto_ptr<char> replacement_string() const;
+ char* replacement_string() const;
/** Only needed for libhnj implementation:
* Get the offset at which the hyphen will end up compared to a
* standard rule. 0 for standard rules, Schiff-fahrt would yield 1. */
int getHyphenOffset() const { return insert_pre.size() - del_pre; }
/** Only needed for libhnj implementation:
* Returns the total number of bytes that need to be cut out
* before the replacement_string() should be inserted. */
int getTotalCutout() const { return skip_post + del_pre; }
};
}
#endif
diff --git a/src/HyphenationTree.cpp b/src/HyphenationTree.cpp
index e180799..d4368b6 100644
--- a/src/HyphenationTree.cpp
+++ b/src/HyphenationTree.cpp
@@ -1,239 +1,239 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
/* ------------- Implementation for HyphenationTree.h ---------------- */
#include "HyphenationTree.h"
#include "UTF8Functions.h"
#include <iostream>
using namespace std;
using namespace Hyphenate;
/* The HyphenationNode is a tree node for the hyphenation search tree. It
* represents the matching state after a single character; if there is a
* pattern that ends with that particular character, the hyphenation_pattern
* is set to non-NULL. The jump_table links to the children of that node,
* indexed by letters. */
class Hyphenate::HyphenationNode {
public:
typedef std::map<char, HyphenationNode*> JumpTable;
/* Table of children */
JumpTable jump_table;
/* Hyphenation pattern associated with the full path to this node. */
- std::auto_ptr<HyphenationRule> hyphenation_pattern;
+ std::unique_ptr<HyphenationRule> hyphenation_pattern;
HyphenationNode() {}
~HyphenationNode() {
/* The destructor has to destroy all childrens. */
for (JumpTable::iterator i = jump_table.begin();
i != jump_table.end(); i++)
delete i->second;
}
/** Find a particular jump table entry, or NULL if there is
* none for that letter. */
inline const HyphenationNode *find(char arg) const {
JumpTable::const_iterator i = jump_table.find(arg);
if (i != jump_table.end()) return i->second; else return NULL;
}
/** Find a particular jump table entry, or NULL if there is none
* for that letter. */
inline HyphenationNode *find(char arg) {
JumpTable::iterator i = jump_table.find(arg);
if (i != jump_table.end()) return i->second; else return NULL;
}
/** Insert a particular hyphenation pattern into this
* hyphenation subtree.
* \param pattern The character pattern to match in the input word.
* \param hp The digit-pattern for the hyphenation algorithm.
*/
void insert(const char *id,
- std::auto_ptr<HyphenationRule> pattern);
+ std::unique_ptr<HyphenationRule> pattern);
/** Apply all patterns for that subtree. */
void apply_patterns(
char *priority_buffer,
const HyphenationRule ** rule_buffer,
const char *to_match) const;
};
Hyphenate::HyphenationTree::HyphenationTree() :
root(new HyphenationNode()), start_safe(1), end_safe(1) {}
Hyphenate::HyphenationTree::~HyphenationTree() {
delete root;
}
-void Hyphenate::HyphenationTree::insert(auto_ptr<HyphenationRule> pattern) {
+void Hyphenate::HyphenationTree::insert(unique_ptr<HyphenationRule> pattern) {
/* Convert our key to lower case to ease matching. */
const std::string& upperCaseKey = pattern->getKey();
const size_t m = upperCaseKey.size();
std::string lowerCaseKey;
U8STRING_FOR_EACH_CHARACTER_DO_BEGIN(upperCaseKey, i, m, ch, REPLACEMENT_CHARACTER);
ch = utf32ToLower(ch);
U8_ENCODE(ch, lowerCaseKey.push_back);
U8STRING_FOR_EACH_CHARACTER_DO_END();
- root->insert(lowerCaseKey.c_str(), pattern);
+ root->insert(lowerCaseKey.c_str(), std::move(pattern));
}
void HyphenationNode::insert(const char* key_string,
- auto_ptr<HyphenationRule> pattern)
+ unique_ptr<HyphenationRule> pattern)
{
/* Is this the terminal node for that pattern? */
if (key_string[0] == 0) {
/* If we descended the tree all the way to the last letter, we can now
* write the pattern into this node. */
- hyphenation_pattern.reset(pattern.release());
+ hyphenation_pattern = std::move(pattern);
} else {
/* If not, however, we make sure that the branch for our letter exists
* and descend. */
char key = key_string[0];
/* Ensure presence of a branch for that letter. */
HyphenationNode *p = find(key);
if (!p) {
p = new HyphenationNode();
jump_table.insert(pair<char, HyphenationNode*>(key, p));
}
/* Go to the next letter and descend. */
- p->insert(key_string + 1, pattern);
+ p->insert(key_string + 1, std::move(pattern));
}
}
void Hyphenate::HyphenationNode::apply_patterns(
char *priority_buffer,
const HyphenationRule ** rule_buffer,
const char *to_match) const
{
/* First of all, if we can descend further into the tree (that is,
* there is an input char left and there is a branch in the tree),
* do so. */
char key = to_match[0];
if (key != 0) {
const HyphenationNode *next = find(key);
if (next != NULL)
next->apply_patterns(priority_buffer, rule_buffer, to_match + 1);
}
/* Now, if we have a pattern at this point in the tree, it must be a good
* match. Apply the pattern. */
const HyphenationRule* hyp_pat = hyphenation_pattern.get();
if (hyp_pat != NULL)
for (int i = 0; hyp_pat->hasPriority(i); i++)
if (priority_buffer[i] < hyp_pat->priority(i)) {
rule_buffer[i] = (hyp_pat->priority(i) % 2 == 1) ? hyp_pat : NULL;
priority_buffer[i] = hyp_pat->priority(i);
}
}
-auto_ptr<vector<const HyphenationRule*> > HyphenationTree::applyPatterns
+unique_ptr<vector<const HyphenationRule*> > HyphenationTree::applyPatterns
(const string &word) const
{
return applyPatterns(word, string::npos);
}
-auto_ptr<vector<const HyphenationRule*> > HyphenationTree::applyPatterns
+unique_ptr<vector<const HyphenationRule*> > HyphenationTree::applyPatterns
(const string &word, size_t stop_at) const
{
/* Prepend and append a . to the string (word start and end), and convert
* all characters to lower case to ease matching. */
std::string w = ".";
{
const size_t m = word.size();
U8STRING_FOR_EACH_CHARACTER_DO_BEGIN(word, i, m, ch, REPLACEMENT_CHARACTER);
ch = utf32ToLower(ch);
U8_ENCODE(ch, w.push_back);
U8STRING_FOR_EACH_CHARACTER_DO_END();
}
w += ".";
/* Vectors for priorities and rules. */
vector<char> pri(w.size() + 2, 0);
vector<const HyphenationRule*> rules(w.size() + 1, NULL);
/* For each suffix of the expanded word, search all matching prefixes.
* That way, each possible match is found. Note the pointer arithmetics
* in the first and second argument. */
for (unsigned int i = 0; i < w.size() - 1 && i <= stop_at; i++)
root->apply_patterns((&pri[i]), (&rules[i]), w.c_str() + i);
/* Copy the results to a shorter vector. */
- auto_ptr<vector<const HyphenationRule*> > output_rules(
+ unique_ptr<vector<const HyphenationRule*> > output_rules(
new vector<const HyphenationRule*>(word.size(), NULL));
/* We honor the safe areas at the start and end of each word here. */
/* Please note that the incongruence between start and end is due
* to the fact that hyphenation happens _before_ each character. */
unsigned int ind_start = 1, ind_end = w.size() - 1;
for (unsigned int skip = 0; skip < start_safe && ind_start < w.size(); ind_start++)
if ((w[ind_start] & 0xC0) != 0x80)
skip++;
for (unsigned int skip = 0; skip < end_safe && ind_end > 0; ind_end--)
if ((w[ind_end] & 0xC0) != 0x80)
skip++;
for (unsigned int i = ind_start; i <= ind_end; i++)
(*output_rules)[i - 1] = rules[i];
return output_rules;
}
void HyphenationTree::loadPatterns(istream &i) {
string pattern;
/* The input is a file with whitespace-separated words.
* The first numerical-only word we encountered denotes the safe start,
* the second the safe end area. */
char ch;
bool numeric = true;
int num_field = 0;
while (i.get(ch)) {
if (ch == '\n' || ch == '\r' || ch == '\t' || ch == ' ') {
/* The output operation. */
if (pattern.size() && numeric && num_field <= 1) {
((num_field == 0) ? start_safe : end_safe) = atoi(pattern.c_str());
num_field++;
} else if (pattern.size()) {
insert(
- auto_ptr<HyphenationRule>(new HyphenationRule(pattern)));
+ unique_ptr<HyphenationRule>(new HyphenationRule(pattern)));
}
/* Reinitialize state. */
pattern.clear();
numeric = true;
} else {
/* This rule catches all other (mostly alpha, but probably UTF-8)
* characters. It normalizes the previous letter and then appends
* it to the pattern. */
pattern += ch;
if (ch < '0' || ch > '9') numeric = false;
}
}
if (pattern.size())
- insert(auto_ptr<HyphenationRule>(new HyphenationRule(pattern)));
+ insert(unique_ptr<HyphenationRule>(new HyphenationRule(pattern)));
}
diff --git a/src/HyphenationTree.h b/src/HyphenationTree.h
index e387a93..21d360c 100644
--- a/src/HyphenationTree.h
+++ b/src/HyphenationTree.h
@@ -1,73 +1,73 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
#ifndef HYPHENATION_TREE_H
#define HYPHENATION_TREE_H
#include <string>
#include <memory>
#include <vector>
#include <map>
#include "HyphenationRule.h"
namespace Hyphenate {
class HyphenationNode;
/**
* \class HyphenationTree
* \brief The root for a tree of HyphenationNodes.
*/
class HyphenationTree {
private:
HyphenationNode* root;
unsigned int start_safe, end_safe;
public:
/** The constructor constructs an empty tree, which can be filled
* either by reading a whole file of patterns with
* <code>loadPatterns</code> or by <code>insert</code>. */
HyphenationTree();
~HyphenationTree();
/** Read the istream while it is not empty, cutting it into words
* and constructing patterns from it. The first lone number
* encountered will be the safe start, the second the safe end. */
void loadPatterns(std::istream &source);
/** Insert a particular hyphenation pattern into the hyphenation tree.
* \param pattern The character pattern to match in the input word.
*/
- void insert(std::auto_ptr<HyphenationRule> pattern);
+ void insert(std::unique_ptr<HyphenationRule> pattern);
/** Apply all patterns for that hyphenation tree to the supplied
* string. Return an array with Hyphenation rules that should be
* applied before the addition of the next letter of the string.
* The pointers in that vector point into this tree. */
- std::auto_ptr<std::vector<const HyphenationRule*> > applyPatterns
+ std::unique_ptr<std::vector<const HyphenationRule*> > applyPatterns
(const std::string &word) const;
/** Like applyPattern, but will only hyphenate up to the letter
* end_at. */
- std::auto_ptr<std::vector<const HyphenationRule*> > applyPatterns
+ std::unique_ptr<std::vector<const HyphenationRule*> > applyPatterns
(const std::string &word, size_t end_at) const;
};
}
#endif
diff --git a/src/Hyphenator.cpp b/src/Hyphenator.cpp
index b5e5da4..c0808b5 100644
--- a/src/Hyphenator.cpp
+++ b/src/Hyphenator.cpp
@@ -1,262 +1,262 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
#include "Hyphenator.h"
#include <iostream>
#include <fstream>
#include <vector>
#include <map>
#include <memory>
#include <ctype.h>
#include <stdlib.h>
#include "HyphenationRule.h"
#include "HyphenationTree.h"
#include "UTF8Functions.h"
#define UTF8_MAX 6
using namespace std;
using namespace Hyphenate;
/** The hyphenation table parser. */
-static auto_ptr<HyphenationTree> read_hyphenation_table(const char *filename) {
+static unique_ptr<HyphenationTree> read_hyphenation_table(const char *filename) {
ifstream i(filename, fstream::in);
- auto_ptr<HyphenationTree> output(new HyphenationTree());
+ unique_ptr<HyphenationTree> output(new HyphenationTree());
output->loadPatterns(i);
return output;
}
/** Build a hyphenator from the patterns in the file provided. */
Hyphenate::Hyphenator::Hyphenator(const char *filename) {
dictionary = read_hyphenation_table(filename);
}
Hyphenator::~Hyphenator() {}
std::string Hyphenator::hyphenate
(const std::string &word, const std::string &hyphen)
{
string result;
unsigned int word_start = -1;
/* Go through the input. All non-alpha characters are added to the
* output immediately, and words are hyphenated and then added. */
for (unsigned int i = 0; i < word.size(); i++) {
/* Skip UTF-8 tail bytes. */
if ((word[i] & 0xC0) == 0x80)
;
else {
bool isalpha = utf32IsAlpha(utf8GetCharacter(word.c_str() + i));
if (word_start == string::npos && isalpha)
word_start = i;
else if (word_start != string::npos && !isalpha) {
result +=
hyphenate_word(word.substr(word_start, i - word_start), hyphen);
word_start = string::npos;
}
}
if (word_start == string::npos)
result += word[i];
}
if (word_start != string::npos)
result += hyphenate_word(word.substr(word_start), hyphen);
return result;
}
std::string Hyphenator::hyphenate_word
(const std::string &word, const std::string &hyphen)
{
- auto_ptr<vector<const HyphenationRule*> > rules =
+ unique_ptr<vector<const HyphenationRule*> > rules =
dictionary->applyPatterns(word);
/* Build our result string. Of course, we _could_ insert characters in
* w, but that would be highly inefficient. */
string result;
int acc_skip = 0;
for (unsigned int i = 0; i < word.size(); i++) {
if ((*rules)[i] != NULL)
acc_skip += (*rules)[i]->apply(result, hyphen);
if (acc_skip > 0)
acc_skip--;
else
result += word[i];
}
return result;
}
pair<std::string, std::string> Hyphenator::hyphenate_at
(const std::string &src, const std::string &hyphen, size_t len)
{
/* First of all, find the word which needs to be hyphenated. */
const char *cur = src.c_str();
for (unsigned int i = 0; i < len; i++)
cur = utf8GoToNextCharacter(cur);
const char *next = cur;
if (!utf32IsBreakableSpace(utf8GetCharacter(next)))
next = utf8GoToNextCharacter(next);
pair<string, string> result;
if (utf32IsBreakableSpace(utf8GetCharacter(next))) {
/* We are lucky: There is a space we can hyphenate at. */
/* We leave no spaces at the end of a line: */
while (utf32IsBreakableSpace(utf8GetCharacter(cur)))
cur = utf8GoToPrevCharacter(cur);
int len = cur - src.c_str() + 1;
result.first = src.substr(0, len);
/* Neither do we leave spaces at the beginning of the next. */
while (utf32IsBreakableSpace(utf8GetCharacter(next)))
next = utf8GoToNextCharacter(next);
result.second = src.substr(next - src.c_str());
} else {
/* We can hyphenate at hyphenation points in words or at spaces, whatever
* comes earlier. We will check all words here in the loop. */
const char *border = cur;
while (true) {
/* Find the start of a word first. */
bool in_word = utf32IsAlpha(utf8GetCharacter(cur));
const char *word_start = NULL;
while (cur > src.c_str()) {
cur = utf8GoToPrevCharacter(cur);
int ch = utf8GetCharacter(cur);
if (in_word && (!utf32IsAlpha(ch))) {
/* If we have a word, try hyphenating it.*/
word_start = utf8GoToNextCharacter(cur);
break;
} else if (utf32IsBreakableSpace(ch)) {
break;
} else if (!in_word && utf32IsAlpha(ch))
in_word = true;
if (cur == src.c_str() && in_word)
word_start = cur;
}
/* There are two reasons why we may have left the previous loop with-
* out result:
* Either because our word goes all the way to the first character,
* or because we found whitespace. */
/* In the first case, there is nothing really hyphenateable. */
if (word_start != NULL) {
/* We have the start of a word, now look for the character after
* the end. */
const char *word_end = word_start;
while (utf32IsAlpha(utf8GetCharacter(word_end)))
word_end = utf8GoToNextCharacter(word_end);
/* Build the substring consisting of the word. */
string word;
for (const char *i = word_start; i < word_end; i++)
word += *i;
/* Hyphenate the word. */
- auto_ptr<vector<const HyphenationRule*> > rules =
+ unique_ptr<vector<const HyphenationRule*> > rules =
dictionary->applyPatterns(word);
/* Determine the index of the latest hyphenation that will still
* fit. */
int latest_possible_hyphenation = -1;
int earliest_hyphenation = -1;
for (int i = 0; i < (int)rules->size(); i++)
if ((*rules)[i] != NULL) {
if (earliest_hyphenation == -1)
earliest_hyphenation = i;
if (word_start + i +
(*rules)[i]->spaceNeededPreHyphen() + hyphen.length()
<= border)
{
if (i > latest_possible_hyphenation) {
latest_possible_hyphenation = i;
}
} else
break;
}
bool have_space = false;
for (const char *i = src.c_str(); i <= word_start;
i = utf8GoToNextCharacter(i))
if (utf32IsBreakableSpace(utf8GetCharacter(i))) {
have_space = true;
break;
}
if (latest_possible_hyphenation == -1 && !have_space)
latest_possible_hyphenation = earliest_hyphenation;
/* Apply the best hyphenation, if any. */
if (latest_possible_hyphenation >= 0) {
int i = latest_possible_hyphenation;
result.first = src.substr(0, word_start - src.c_str() + i);
(*rules)[i]->apply_first(result.first, hyphen);
int skip = (*rules)[i]->apply_second(result.second);
const char *after_hyphen = word_start + i + skip;
result.second += string(after_hyphen);
break;
}
}
if (cur == src.c_str()) {
/* We cannot hyphenate at all, so leave the first block standing
* and move to its end. */
const char *eol = cur;
while (*eol != 0 && !utf32IsBreakableSpace(utf8GetCharacter(eol)))
eol = utf8GoToNextCharacter(eol);
result.first = src.substr(0, eol - src.c_str() + 1);
while (*eol != 0 && utf32IsBreakableSpace(utf8GetCharacter(eol)))
eol = utf8GoToNextCharacter(eol);
result.second = string(eol);
break;
} else if (utf32IsBreakableSpace(utf8GetCharacter(cur))) {
/* eol is the end of the previous line, bol the start of the
* next. */
const char *eol = cur, *bol = cur;
while (utf32IsBreakableSpace(utf8GetCharacter(eol)))
eol = utf8GoToPrevCharacter(eol);
while (utf32IsBreakableSpace(utf8GetCharacter(bol)))
bol = utf8GoToNextCharacter(bol);
result.first = src.substr(0, eol - src.c_str() + 1);
result.second = string(bol);
break;
}
}
}
return result;
}
-std::auto_ptr<std::vector<const HyphenationRule*> >
+std::unique_ptr<std::vector<const HyphenationRule*> >
Hyphenate::Hyphenator::applyHyphenationRules(const std::string& word)
{
return dictionary->applyPatterns(word);
}
diff --git a/src/Hyphenator.h b/src/Hyphenator.h
index 2157d0b..bfff898 100644
--- a/src/Hyphenator.h
+++ b/src/Hyphenator.h
@@ -1,103 +1,103 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
#ifndef HYPHENATE_HYPHENATOR_H
#define HYPHENATE_HYPHENATOR_H
#include <map>
#include <string>
#include <memory>
#include <vector>
namespace Hyphenate {
class HyphenationTree;
class HyphenationRule;
class Hyphenator {
private:
- std::auto_ptr<HyphenationTree> dictionary;
+ std::unique_ptr<HyphenationTree> dictionary;
std::string hyphenate_word
(const std::string &word, const std::string &hyphen);
public:
/** Build a hyphenator from the patterns in the file provided. */
Hyphenator(const char *filename);
/** Destructor. */
~Hyphenator();
/** The actual workhorse. You'll want to call this function once
* for each word (NEW: or complete string, not only word. The library
* will do the word-splitting for you) you want hyphenated.
*
* Usage example:
* Hyphenate::Hyphenator hyphenator(Language("de-DE"));
* hyphenator.hyphenate("Schiffahrt");
*
* yields "Schiff-fahrt", while
*
* Hyphenate::Hyphenator hyphenator(Language("en"));
* hyphenator.hyphenate("example", "­");
*
* yields "ex­am­ple".
*
* \param word A single UTF-8 encoded word to be hyphenated.
* \param hyphen The string to put at each possible
* hyphenation point. The default is an ASCII dash.
*/
std::string hyphenate
(const std::string &word,
const std::string &hyphen = "-");
/** Find a single hyphenation point in the string so that the first
* part (including a hyphen) will be shorter or equal in length
* to the parameter len. If this is not possible, choose the shortest
* possible string.
*
* The first element is the result, the second element the rest of
* the string.
*
* Example: To format a piece of text to width 60, use the following
* loop:
* string rest = text;
* string result = "";
* while ( ! rest.empty() ) {
* pair<string,string> p = your_hyphenator.hyphenate_at(rest);
* result += p.first + "\n"
* rest = p.second;
* }
**/
std::pair<std::string, std::string> hyphenate_at
(const std::string &word,
const std::string &hyphen = "-",
size_t len = std::string::npos);
/** Just apply the hyphenation patterns to the word, but don't
* hyphenate anything.
*
* \returns A vector with the same size as the word with a non-NULL
* entry for every hyphenation point. */
- std::auto_ptr<std::vector<const HyphenationRule*> >
+ std::unique_ptr<std::vector<const HyphenationRule*> >
applyHyphenationRules(const std::string& word);
};
}
#endif
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, May 8, 8:26 PM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
59840
Default Alt Text
(32 KB)
Attached To
Mode
R79 meandmyshadow
Attached
Detach File
Event Timeline