Page MenuHomePhabricator (Chris)

No OneTemporary

Authored By
Unknown
Size
32 KB
Referenced Files
None
Subscribers
None
diff --git a/src/HyphenationRule.cpp b/src/HyphenationRule.cpp
index e203f42..d1aadef 100644
--- a/src/HyphenationRule.cpp
+++ b/src/HyphenationRule.cpp
@@ -1,108 +1,108 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
/* This source file provides code for the HyphenationRule class which
* is documented in HyphenationRule.h */
#include "HyphenationRule.h"
#include <string.h>
using namespace std;
Hyphenate::HyphenationRule::HyphenationRule(std::string dpattern)
: del_pre(0), skip_post(0)
{
int priority = 0;
unsigned int i;
for (i = 0; i < dpattern.size() && dpattern[i] != '/'; i++)
if (dpattern[i] >= '0' && dpattern[i] <= '9')
priority = 10 * priority + dpattern[i] - '0';
else {
key += dpattern[i];
priorities.push_back(priority);
priority = 0;
}
/* Complete and simplify the array. */
priorities.push_back(priority);
while (priorities.back() == 0) priorities.pop_back();
/* Now check for nonstandard hyphenation. First, parse it. */
if (i < dpattern.size() && dpattern[i] == '/') {
i += 1; /* Ignore the /. */
int field = 1;
unsigned int start = 0, cut = 0;
for (; i < dpattern.size(); i++) {
if (field == 1 && dpattern[i] == '=')
field++;
else if (field >= 2 && field <= 3 && dpattern[i] == ',')
field++;
else if (field == 4 && (dpattern[i] < '0' || dpattern[i] > '9'))
break;
else if (field == 1)
insert_pre += dpattern[i];
else if (field == 2)
insert_post += dpattern[i];
else if (field == 3)
start = start * 10 + dpattern[i] - '0';
else if (field == 4)
cut = cut * 10 + dpattern[i] - '0';
}
if (field < 4) /* There was no fourth field */
cut = key.size() - start;
if (field < 3)
start = 1;
skip_post = cut;
for (unsigned int j = start; j < start + cut && j < priorities.size(); j++) {
if (priorities[j - 1] % 2 == 1) break;
del_pre++; skip_post--;
}
}
}
int Hyphenate::HyphenationRule::apply(string& word, const string &hyph) const
{
apply_first(word, hyph);
return apply_second(word);
}
void Hyphenate::HyphenationRule::apply_first(string& word, const string &hyph)
const
{
if (del_pre > 0) word.erase(word.size() - del_pre);
word += insert_pre;
word += hyph;
}
int Hyphenate::HyphenationRule::apply_second(string& word) const
{
word += insert_post;
return skip_post;
}
-auto_ptr<char> Hyphenate::HyphenationRule::replacement_string() const {
+char* Hyphenate::HyphenationRule::replacement_string() const {
string s = (insert_pre + "=" + insert_post);
char *r = (char *)malloc((s.size() + 1) * sizeof(char));
strcpy(r, s.c_str());
- return auto_ptr<char>(r);
+ return r;
}
diff --git a/src/HyphenationRule.h b/src/HyphenationRule.h
index ec3ef96..fd060f7 100644
--- a/src/HyphenationRule.h
+++ b/src/HyphenationRule.h
@@ -1,120 +1,119 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
#ifndef HYPHENATION_RULE_H
#define HYPHENATION_RULE_H
#include <string>
-#include <memory>
#include <vector>
namespace Hyphenate {
/** The HyphenationRule class represents a single Hyphenation Rule, that
* is, a pattern that has a number assigned to each letter and will,
* if applied, hyphenate a word at the given point. The number assigned
* to each letter and accessed by priority() is odd when hyphenation
* should occur before the letter, and only the rule with the highest
* number will be applied to any letter. */
class HyphenationRule {
private:
int del_pre, skip_post;
std::string key, insert_pre, insert_post;
std::vector<char> priorities;
std::string replacement;
public:
/* HyphenationRule is constructed from a string consisting of
* letters with numbers strewn in. The numbers are the priorities.
* In addition, a / will start a non-standard hyphenization. */
HyphenationRule(std::string source_string);
/** Call this method once an hyphen would, according to its base rule,
* be placed. Returns the number of bytes that should not be
* printed afterwards.
*
* For example, when applying the rules to "example", you should
* call the rules returned by HyphenationTree or Hyphenator as
* follows:
* string word = "ex";
* rule1.apply(word, "-");
* word += "am" ;
* rule2.apply(word, "-");
* word += "ple";
*
* Watch out for non-standard rules, though. Example: "Schiffahrt"
* string word = "Schif";
* int skip = rule1.apply(word, "-");
* char *rest = "fahrt";
* word += rest+skip;
*/
int apply(std::string& word, const std::string &hyphen) const;
/** Only apply the first part, that is, up to and including the
* hyphen. */
void apply_first(std::string& word, const std::string &hyphen) const;
/** Only apply the second part, after the hyphen. */
int apply_second(std::string& word) const;
/** Returns true iff there is a priority value != 0 for this offset
* or a larger one. */
inline bool hasPriority(unsigned int offset) const
{
return priorities.size() > offset;
}
/** Returns the hyphenation priority for a hyphen preceding the byte
* at the given offset. */
inline char priority(unsigned int offset) const { return priorities[offset]; }
/** Returns the pattern to match for this rule to apply. */
inline std::string &getKey() { return key; }
/** Returns the amount of bytes that will additionally be needed
* in front of the hyphen if this rule is applied. 0 for standard
* hyphenation, 1 for Schiff-fahrt. */
int spaceNeededPreHyphen() const
{
return insert_pre.size() - del_pre;
}
/** Returns true iff this rule is not a standard hyphenation rule. */
bool isNonStandard() const
{
return del_pre != 0 || skip_post != 0 ||
(!insert_pre.empty()) || (!insert_post.empty());
}
/** Only needed for libhnj implementation:
* Returns an malloc()-allocated char array consisting of the full
* replacement of this rule, with a = between the parts. For example,
* for Schiffahrt -> Schiff-fahrt this yields 'ff=' or 'ff=f',
* depending on implementation. */
- std::auto_ptr<char> replacement_string() const;
+ char* replacement_string() const;
/** Only needed for libhnj implementation:
* Get the offset at which the hyphen will end up compared to a
* standard rule. 0 for standard rules, Schiff-fahrt would yield 1. */
int getHyphenOffset() const { return insert_pre.size() - del_pre; }
/** Only needed for libhnj implementation:
* Returns the total number of bytes that need to be cut out
* before the replacement_string() should be inserted. */
int getTotalCutout() const { return skip_post + del_pre; }
};
}
#endif
diff --git a/src/HyphenationTree.cpp b/src/HyphenationTree.cpp
index e180799..d4368b6 100644
--- a/src/HyphenationTree.cpp
+++ b/src/HyphenationTree.cpp
@@ -1,239 +1,239 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
/* ------------- Implementation for HyphenationTree.h ---------------- */
#include "HyphenationTree.h"
#include "UTF8Functions.h"
#include <iostream>
using namespace std;
using namespace Hyphenate;
/* The HyphenationNode is a tree node for the hyphenation search tree. It
* represents the matching state after a single character; if there is a
* pattern that ends with that particular character, the hyphenation_pattern
* is set to non-NULL. The jump_table links to the children of that node,
* indexed by letters. */
class Hyphenate::HyphenationNode {
public:
typedef std::map<char, HyphenationNode*> JumpTable;
/* Table of children */
JumpTable jump_table;
/* Hyphenation pattern associated with the full path to this node. */
- std::auto_ptr<HyphenationRule> hyphenation_pattern;
+ std::unique_ptr<HyphenationRule> hyphenation_pattern;
HyphenationNode() {}
~HyphenationNode() {
/* The destructor has to destroy all childrens. */
for (JumpTable::iterator i = jump_table.begin();
i != jump_table.end(); i++)
delete i->second;
}
/** Find a particular jump table entry, or NULL if there is
* none for that letter. */
inline const HyphenationNode *find(char arg) const {
JumpTable::const_iterator i = jump_table.find(arg);
if (i != jump_table.end()) return i->second; else return NULL;
}
/** Find a particular jump table entry, or NULL if there is none
* for that letter. */
inline HyphenationNode *find(char arg) {
JumpTable::iterator i = jump_table.find(arg);
if (i != jump_table.end()) return i->second; else return NULL;
}
/** Insert a particular hyphenation pattern into this
* hyphenation subtree.
* \param pattern The character pattern to match in the input word.
* \param hp The digit-pattern for the hyphenation algorithm.
*/
void insert(const char *id,
- std::auto_ptr<HyphenationRule> pattern);
+ std::unique_ptr<HyphenationRule> pattern);
/** Apply all patterns for that subtree. */
void apply_patterns(
char *priority_buffer,
const HyphenationRule ** rule_buffer,
const char *to_match) const;
};
Hyphenate::HyphenationTree::HyphenationTree() :
root(new HyphenationNode()), start_safe(1), end_safe(1) {}
Hyphenate::HyphenationTree::~HyphenationTree() {
delete root;
}
-void Hyphenate::HyphenationTree::insert(auto_ptr<HyphenationRule> pattern) {
+void Hyphenate::HyphenationTree::insert(unique_ptr<HyphenationRule> pattern) {
/* Convert our key to lower case to ease matching. */
const std::string& upperCaseKey = pattern->getKey();
const size_t m = upperCaseKey.size();
std::string lowerCaseKey;
U8STRING_FOR_EACH_CHARACTER_DO_BEGIN(upperCaseKey, i, m, ch, REPLACEMENT_CHARACTER);
ch = utf32ToLower(ch);
U8_ENCODE(ch, lowerCaseKey.push_back);
U8STRING_FOR_EACH_CHARACTER_DO_END();
- root->insert(lowerCaseKey.c_str(), pattern);
+ root->insert(lowerCaseKey.c_str(), std::move(pattern));
}
void HyphenationNode::insert(const char* key_string,
- auto_ptr<HyphenationRule> pattern)
+ unique_ptr<HyphenationRule> pattern)
{
/* Is this the terminal node for that pattern? */
if (key_string[0] == 0) {
/* If we descended the tree all the way to the last letter, we can now
* write the pattern into this node. */
- hyphenation_pattern.reset(pattern.release());
+ hyphenation_pattern = std::move(pattern);
} else {
/* If not, however, we make sure that the branch for our letter exists
* and descend. */
char key = key_string[0];
/* Ensure presence of a branch for that letter. */
HyphenationNode *p = find(key);
if (!p) {
p = new HyphenationNode();
jump_table.insert(pair<char, HyphenationNode*>(key, p));
}
/* Go to the next letter and descend. */
- p->insert(key_string + 1, pattern);
+ p->insert(key_string + 1, std::move(pattern));
}
}
void Hyphenate::HyphenationNode::apply_patterns(
char *priority_buffer,
const HyphenationRule ** rule_buffer,
const char *to_match) const
{
/* First of all, if we can descend further into the tree (that is,
* there is an input char left and there is a branch in the tree),
* do so. */
char key = to_match[0];
if (key != 0) {
const HyphenationNode *next = find(key);
if (next != NULL)
next->apply_patterns(priority_buffer, rule_buffer, to_match + 1);
}
/* Now, if we have a pattern at this point in the tree, it must be a good
* match. Apply the pattern. */
const HyphenationRule* hyp_pat = hyphenation_pattern.get();
if (hyp_pat != NULL)
for (int i = 0; hyp_pat->hasPriority(i); i++)
if (priority_buffer[i] < hyp_pat->priority(i)) {
rule_buffer[i] = (hyp_pat->priority(i) % 2 == 1) ? hyp_pat : NULL;
priority_buffer[i] = hyp_pat->priority(i);
}
}
-auto_ptr<vector<const HyphenationRule*> > HyphenationTree::applyPatterns
+unique_ptr<vector<const HyphenationRule*> > HyphenationTree::applyPatterns
(const string &word) const
{
return applyPatterns(word, string::npos);
}
-auto_ptr<vector<const HyphenationRule*> > HyphenationTree::applyPatterns
+unique_ptr<vector<const HyphenationRule*> > HyphenationTree::applyPatterns
(const string &word, size_t stop_at) const
{
/* Prepend and append a . to the string (word start and end), and convert
* all characters to lower case to ease matching. */
std::string w = ".";
{
const size_t m = word.size();
U8STRING_FOR_EACH_CHARACTER_DO_BEGIN(word, i, m, ch, REPLACEMENT_CHARACTER);
ch = utf32ToLower(ch);
U8_ENCODE(ch, w.push_back);
U8STRING_FOR_EACH_CHARACTER_DO_END();
}
w += ".";
/* Vectors for priorities and rules. */
vector<char> pri(w.size() + 2, 0);
vector<const HyphenationRule*> rules(w.size() + 1, NULL);
/* For each suffix of the expanded word, search all matching prefixes.
* That way, each possible match is found. Note the pointer arithmetics
* in the first and second argument. */
for (unsigned int i = 0; i < w.size() - 1 && i <= stop_at; i++)
root->apply_patterns((&pri[i]), (&rules[i]), w.c_str() + i);
/* Copy the results to a shorter vector. */
- auto_ptr<vector<const HyphenationRule*> > output_rules(
+ unique_ptr<vector<const HyphenationRule*> > output_rules(
new vector<const HyphenationRule*>(word.size(), NULL));
/* We honor the safe areas at the start and end of each word here. */
/* Please note that the incongruence between start and end is due
* to the fact that hyphenation happens _before_ each character. */
unsigned int ind_start = 1, ind_end = w.size() - 1;
for (unsigned int skip = 0; skip < start_safe && ind_start < w.size(); ind_start++)
if ((w[ind_start] & 0xC0) != 0x80)
skip++;
for (unsigned int skip = 0; skip < end_safe && ind_end > 0; ind_end--)
if ((w[ind_end] & 0xC0) != 0x80)
skip++;
for (unsigned int i = ind_start; i <= ind_end; i++)
(*output_rules)[i - 1] = rules[i];
return output_rules;
}
void HyphenationTree::loadPatterns(istream &i) {
string pattern;
/* The input is a file with whitespace-separated words.
* The first numerical-only word we encountered denotes the safe start,
* the second the safe end area. */
char ch;
bool numeric = true;
int num_field = 0;
while (i.get(ch)) {
if (ch == '\n' || ch == '\r' || ch == '\t' || ch == ' ') {
/* The output operation. */
if (pattern.size() && numeric && num_field <= 1) {
((num_field == 0) ? start_safe : end_safe) = atoi(pattern.c_str());
num_field++;
} else if (pattern.size()) {
insert(
- auto_ptr<HyphenationRule>(new HyphenationRule(pattern)));
+ unique_ptr<HyphenationRule>(new HyphenationRule(pattern)));
}
/* Reinitialize state. */
pattern.clear();
numeric = true;
} else {
/* This rule catches all other (mostly alpha, but probably UTF-8)
* characters. It normalizes the previous letter and then appends
* it to the pattern. */
pattern += ch;
if (ch < '0' || ch > '9') numeric = false;
}
}
if (pattern.size())
- insert(auto_ptr<HyphenationRule>(new HyphenationRule(pattern)));
+ insert(unique_ptr<HyphenationRule>(new HyphenationRule(pattern)));
}
diff --git a/src/HyphenationTree.h b/src/HyphenationTree.h
index e387a93..21d360c 100644
--- a/src/HyphenationTree.h
+++ b/src/HyphenationTree.h
@@ -1,73 +1,73 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
#ifndef HYPHENATION_TREE_H
#define HYPHENATION_TREE_H
#include <string>
#include <memory>
#include <vector>
#include <map>
#include "HyphenationRule.h"
namespace Hyphenate {
class HyphenationNode;
/**
* \class HyphenationTree
* \brief The root for a tree of HyphenationNodes.
*/
class HyphenationTree {
private:
HyphenationNode* root;
unsigned int start_safe, end_safe;
public:
/** The constructor constructs an empty tree, which can be filled
* either by reading a whole file of patterns with
* <code>loadPatterns</code> or by <code>insert</code>. */
HyphenationTree();
~HyphenationTree();
/** Read the istream while it is not empty, cutting it into words
* and constructing patterns from it. The first lone number
* encountered will be the safe start, the second the safe end. */
void loadPatterns(std::istream &source);
/** Insert a particular hyphenation pattern into the hyphenation tree.
* \param pattern The character pattern to match in the input word.
*/
- void insert(std::auto_ptr<HyphenationRule> pattern);
+ void insert(std::unique_ptr<HyphenationRule> pattern);
/** Apply all patterns for that hyphenation tree to the supplied
* string. Return an array with Hyphenation rules that should be
* applied before the addition of the next letter of the string.
* The pointers in that vector point into this tree. */
- std::auto_ptr<std::vector<const HyphenationRule*> > applyPatterns
+ std::unique_ptr<std::vector<const HyphenationRule*> > applyPatterns
(const std::string &word) const;
/** Like applyPattern, but will only hyphenate up to the letter
* end_at. */
- std::auto_ptr<std::vector<const HyphenationRule*> > applyPatterns
+ std::unique_ptr<std::vector<const HyphenationRule*> > applyPatterns
(const std::string &word, size_t end_at) const;
};
}
#endif
diff --git a/src/Hyphenator.cpp b/src/Hyphenator.cpp
index b5e5da4..c0808b5 100644
--- a/src/Hyphenator.cpp
+++ b/src/Hyphenator.cpp
@@ -1,262 +1,262 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
#include "Hyphenator.h"
#include <iostream>
#include <fstream>
#include <vector>
#include <map>
#include <memory>
#include <ctype.h>
#include <stdlib.h>
#include "HyphenationRule.h"
#include "HyphenationTree.h"
#include "UTF8Functions.h"
#define UTF8_MAX 6
using namespace std;
using namespace Hyphenate;
/** The hyphenation table parser. */
-static auto_ptr<HyphenationTree> read_hyphenation_table(const char *filename) {
+static unique_ptr<HyphenationTree> read_hyphenation_table(const char *filename) {
ifstream i(filename, fstream::in);
- auto_ptr<HyphenationTree> output(new HyphenationTree());
+ unique_ptr<HyphenationTree> output(new HyphenationTree());
output->loadPatterns(i);
return output;
}
/** Build a hyphenator from the patterns in the file provided. */
Hyphenate::Hyphenator::Hyphenator(const char *filename) {
dictionary = read_hyphenation_table(filename);
}
Hyphenator::~Hyphenator() {}
std::string Hyphenator::hyphenate
(const std::string &word, const std::string &hyphen)
{
string result;
unsigned int word_start = -1;
/* Go through the input. All non-alpha characters are added to the
* output immediately, and words are hyphenated and then added. */
for (unsigned int i = 0; i < word.size(); i++) {
/* Skip UTF-8 tail bytes. */
if ((word[i] & 0xC0) == 0x80)
;
else {
bool isalpha = utf32IsAlpha(utf8GetCharacter(word.c_str() + i));
if (word_start == string::npos && isalpha)
word_start = i;
else if (word_start != string::npos && !isalpha) {
result +=
hyphenate_word(word.substr(word_start, i - word_start), hyphen);
word_start = string::npos;
}
}
if (word_start == string::npos)
result += word[i];
}
if (word_start != string::npos)
result += hyphenate_word(word.substr(word_start), hyphen);
return result;
}
std::string Hyphenator::hyphenate_word
(const std::string &word, const std::string &hyphen)
{
- auto_ptr<vector<const HyphenationRule*> > rules =
+ unique_ptr<vector<const HyphenationRule*> > rules =
dictionary->applyPatterns(word);
/* Build our result string. Of course, we _could_ insert characters in
* w, but that would be highly inefficient. */
string result;
int acc_skip = 0;
for (unsigned int i = 0; i < word.size(); i++) {
if ((*rules)[i] != NULL)
acc_skip += (*rules)[i]->apply(result, hyphen);
if (acc_skip > 0)
acc_skip--;
else
result += word[i];
}
return result;
}
pair<std::string, std::string> Hyphenator::hyphenate_at
(const std::string &src, const std::string &hyphen, size_t len)
{
/* First of all, find the word which needs to be hyphenated. */
const char *cur = src.c_str();
for (unsigned int i = 0; i < len; i++)
cur = utf8GoToNextCharacter(cur);
const char *next = cur;
if (!utf32IsBreakableSpace(utf8GetCharacter(next)))
next = utf8GoToNextCharacter(next);
pair<string, string> result;
if (utf32IsBreakableSpace(utf8GetCharacter(next))) {
/* We are lucky: There is a space we can hyphenate at. */
/* We leave no spaces at the end of a line: */
while (utf32IsBreakableSpace(utf8GetCharacter(cur)))
cur = utf8GoToPrevCharacter(cur);
int len = cur - src.c_str() + 1;
result.first = src.substr(0, len);
/* Neither do we leave spaces at the beginning of the next. */
while (utf32IsBreakableSpace(utf8GetCharacter(next)))
next = utf8GoToNextCharacter(next);
result.second = src.substr(next - src.c_str());
} else {
/* We can hyphenate at hyphenation points in words or at spaces, whatever
* comes earlier. We will check all words here in the loop. */
const char *border = cur;
while (true) {
/* Find the start of a word first. */
bool in_word = utf32IsAlpha(utf8GetCharacter(cur));
const char *word_start = NULL;
while (cur > src.c_str()) {
cur = utf8GoToPrevCharacter(cur);
int ch = utf8GetCharacter(cur);
if (in_word && (!utf32IsAlpha(ch))) {
/* If we have a word, try hyphenating it.*/
word_start = utf8GoToNextCharacter(cur);
break;
} else if (utf32IsBreakableSpace(ch)) {
break;
} else if (!in_word && utf32IsAlpha(ch))
in_word = true;
if (cur == src.c_str() && in_word)
word_start = cur;
}
/* There are two reasons why we may have left the previous loop with-
* out result:
* Either because our word goes all the way to the first character,
* or because we found whitespace. */
/* In the first case, there is nothing really hyphenateable. */
if (word_start != NULL) {
/* We have the start of a word, now look for the character after
* the end. */
const char *word_end = word_start;
while (utf32IsAlpha(utf8GetCharacter(word_end)))
word_end = utf8GoToNextCharacter(word_end);
/* Build the substring consisting of the word. */
string word;
for (const char *i = word_start; i < word_end; i++)
word += *i;
/* Hyphenate the word. */
- auto_ptr<vector<const HyphenationRule*> > rules =
+ unique_ptr<vector<const HyphenationRule*> > rules =
dictionary->applyPatterns(word);
/* Determine the index of the latest hyphenation that will still
* fit. */
int latest_possible_hyphenation = -1;
int earliest_hyphenation = -1;
for (int i = 0; i < (int)rules->size(); i++)
if ((*rules)[i] != NULL) {
if (earliest_hyphenation == -1)
earliest_hyphenation = i;
if (word_start + i +
(*rules)[i]->spaceNeededPreHyphen() + hyphen.length()
<= border)
{
if (i > latest_possible_hyphenation) {
latest_possible_hyphenation = i;
}
} else
break;
}
bool have_space = false;
for (const char *i = src.c_str(); i <= word_start;
i = utf8GoToNextCharacter(i))
if (utf32IsBreakableSpace(utf8GetCharacter(i))) {
have_space = true;
break;
}
if (latest_possible_hyphenation == -1 && !have_space)
latest_possible_hyphenation = earliest_hyphenation;
/* Apply the best hyphenation, if any. */
if (latest_possible_hyphenation >= 0) {
int i = latest_possible_hyphenation;
result.first = src.substr(0, word_start - src.c_str() + i);
(*rules)[i]->apply_first(result.first, hyphen);
int skip = (*rules)[i]->apply_second(result.second);
const char *after_hyphen = word_start + i + skip;
result.second += string(after_hyphen);
break;
}
}
if (cur == src.c_str()) {
/* We cannot hyphenate at all, so leave the first block standing
* and move to its end. */
const char *eol = cur;
while (*eol != 0 && !utf32IsBreakableSpace(utf8GetCharacter(eol)))
eol = utf8GoToNextCharacter(eol);
result.first = src.substr(0, eol - src.c_str() + 1);
while (*eol != 0 && utf32IsBreakableSpace(utf8GetCharacter(eol)))
eol = utf8GoToNextCharacter(eol);
result.second = string(eol);
break;
} else if (utf32IsBreakableSpace(utf8GetCharacter(cur))) {
/* eol is the end of the previous line, bol the start of the
* next. */
const char *eol = cur, *bol = cur;
while (utf32IsBreakableSpace(utf8GetCharacter(eol)))
eol = utf8GoToPrevCharacter(eol);
while (utf32IsBreakableSpace(utf8GetCharacter(bol)))
bol = utf8GoToNextCharacter(bol);
result.first = src.substr(0, eol - src.c_str() + 1);
result.second = string(bol);
break;
}
}
}
return result;
}
-std::auto_ptr<std::vector<const HyphenationRule*> >
+std::unique_ptr<std::vector<const HyphenationRule*> >
Hyphenate::Hyphenator::applyHyphenationRules(const std::string& word)
{
return dictionary->applyPatterns(word);
}
diff --git a/src/Hyphenator.h b/src/Hyphenator.h
index 2157d0b..bfff898 100644
--- a/src/Hyphenator.h
+++ b/src/Hyphenator.h
@@ -1,103 +1,103 @@
/* libhyphenate: A TeX-like hyphenation algorithm.
* Copyright (C) 2007 Steve Wolter
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* If you have any questions, feel free to contact me:
* http://swolter.sdf1.org
**/
#ifndef HYPHENATE_HYPHENATOR_H
#define HYPHENATE_HYPHENATOR_H
#include <map>
#include <string>
#include <memory>
#include <vector>
namespace Hyphenate {
class HyphenationTree;
class HyphenationRule;
class Hyphenator {
private:
- std::auto_ptr<HyphenationTree> dictionary;
+ std::unique_ptr<HyphenationTree> dictionary;
std::string hyphenate_word
(const std::string &word, const std::string &hyphen);
public:
/** Build a hyphenator from the patterns in the file provided. */
Hyphenator(const char *filename);
/** Destructor. */
~Hyphenator();
/** The actual workhorse. You'll want to call this function once
* for each word (NEW: or complete string, not only word. The library
* will do the word-splitting for you) you want hyphenated.
*
* Usage example:
* Hyphenate::Hyphenator hyphenator(Language("de-DE"));
* hyphenator.hyphenate("Schiffahrt");
*
* yields "Schiff-fahrt", while
*
* Hyphenate::Hyphenator hyphenator(Language("en"));
* hyphenator.hyphenate("example", "&shy;");
*
* yields "ex&shy;am&shy;ple".
*
* \param word A single UTF-8 encoded word to be hyphenated.
* \param hyphen The string to put at each possible
* hyphenation point. The default is an ASCII dash.
*/
std::string hyphenate
(const std::string &word,
const std::string &hyphen = "-");
/** Find a single hyphenation point in the string so that the first
* part (including a hyphen) will be shorter or equal in length
* to the parameter len. If this is not possible, choose the shortest
* possible string.
*
* The first element is the result, the second element the rest of
* the string.
*
* Example: To format a piece of text to width 60, use the following
* loop:
* string rest = text;
* string result = "";
* while ( ! rest.empty() ) {
* pair<string,string> p = your_hyphenator.hyphenate_at(rest);
* result += p.first + "\n"
* rest = p.second;
* }
**/
std::pair<std::string, std::string> hyphenate_at
(const std::string &word,
const std::string &hyphen = "-",
size_t len = std::string::npos);
/** Just apply the hyphenation patterns to the word, but don't
* hyphenate anything.
*
* \returns A vector with the same size as the word with a non-NULL
* entry for every hyphenation point. */
- std::auto_ptr<std::vector<const HyphenationRule*> >
+ std::unique_ptr<std::vector<const HyphenationRule*> >
applyHyphenationRules(const std::string& word);
};
}
#endif

File Metadata

Mime Type
text/x-diff
Expires
Fri, May 8, 8:26 PM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
59840
Default Alt Text
(32 KB)

Event Timeline