WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit e3a6026

Browse files
fix(core): Address UTF-8 character boundary issues in LineBreaker (#3228)
Co-authored-by: Tushar Mathur <[email protected]>
1 parent 4cf48a1 commit e3a6026

File tree

3 files changed

+64
-10
lines changed

3 files changed

+64
-10
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ tailcall-valid = { workspace = true }
175175
dashmap = "6.1.0"
176176
urlencoding = "2.1.3"
177177
tailcall-chunk = "0.3.0"
178+
unicode-segmentation = "1.12.0"
178179

179180
# to build rquickjs bindings on systems without builtin bindings
180181
[target.'cfg(all(target_os = "windows", target_arch = "x86"))'.dependencies]

src/core/document.rs

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use std::fmt::Display;
44
use async_graphql::parser::types::*;
55
use async_graphql::Positioned;
66
use async_graphql_value::ConstValue;
7+
use unicode_segmentation::UnicodeSegmentation;
78

89
use super::jit::Directive as JitDirective;
910
use super::json::JsonLikeOwned;
@@ -28,19 +29,35 @@ impl<'a> Iterator for LineBreaker<'a> {
2829
return None;
2930
}
3031

31-
let end_index = self
32-
.string
33-
.chars()
34-
.skip(self.index + self.break_at)
35-
.enumerate()
36-
.find(|(_, ch)| ch.is_whitespace())
37-
.map(|(index, _)| self.index + self.break_at + index + 1)
38-
.unwrap_or(self.string.len());
32+
let graphemes = self.string[self.index..].graphemes(true).peekable();
33+
let mut iter = graphemes;
34+
let mut current_len = 0;
35+
let mut last_valid_index = self.index;
36+
37+
while let Some(grapheme) = iter.peek() {
38+
let grapheme_len = grapheme.len();
39+
40+
if current_len + grapheme_len > self.break_at {
41+
break;
42+
}
43+
44+
iter.next();
45+
current_len += grapheme_len;
46+
last_valid_index += grapheme_len;
47+
}
48+
49+
for grapheme in iter {
50+
if grapheme.chars().any(|ch| ch.is_whitespace()) {
51+
last_valid_index += grapheme.len();
52+
break;
53+
}
54+
last_valid_index += grapheme.len();
55+
}
3956

4057
let start_index = self.index;
41-
self.index = end_index;
58+
self.index = last_valid_index;
4259

43-
Some(&self.string[start_index..end_index])
60+
Some(&self.string[start_index..self.index])
4461
}
4562
}
4663

@@ -456,3 +473,38 @@ impl<'a, Input: JsonLikeOwned + Display> From<&'a JitDirective<Input>> for Direc
456473
}
457474
}
458475
}
476+
477+
#[cfg(test)]
478+
mod tests {
479+
use super::get_formatted_docs;
480+
481+
#[test]
482+
fn test_get_formatted_docs() {
483+
let input = Some(String::from(
484+
"This is a test string for get_formatted_docs function. You are typing a long sentence for testing. What a nice, long sentence!",
485+
));
486+
let indent = 4;
487+
488+
let result = get_formatted_docs(input, indent);
489+
let expected = String::from(
490+
" \"\"\"\n This is a test string for get_formatted_docs function. You are typing a long sentence \n for testing. What a nice, long sentence!\n \"\"\"\n",
491+
);
492+
493+
assert_eq!(result, expected)
494+
}
495+
496+
#[test]
497+
fn test_get_formatted_docs_utf8() {
498+
let input = Some(String::from(
499+
"get_formatted_docs 함수 테스트를 위한 문장입니다. 테스트를 위해 긴 문장을 입력하는 중 입니다. テストのために長い文章を入力しているところです。なんて素敵な長文です!",
500+
));
501+
let indent = 4;
502+
503+
let result = get_formatted_docs(input, indent);
504+
let expected = String::from(
505+
" \"\"\"\n get_formatted_docs 함수 테스트를 위한 문장입니다. 테스트를 위해 \n 긴 문장을 입력하는 중 입니다. テストのために長い文章を入力しているところです。なんて素敵な長文です!\n \"\"\"\n",
506+
);
507+
508+
assert_eq!(result, expected)
509+
}
510+
}

0 commit comments

Comments
 (0)