Skip navigation.
 
mlRe: Unicode canonical decomposed form and text encoding
FROM : Aki Inoue
DATE : Tue Jan 14 20:39:38 2003

Renaud,

I cooked up a simple example of using TEC to canonically decompose.
#import <Foundation/Foundation.h>

static UniChar characters[] = ; // LATIN CAPITAL LETTER A WITH
GRAVE

#define MAX_BUFFER_LENGTH (100)

int main (int argc, const char * argv[]) {
    NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
    UnicodeToTextInfo textInfo;
    UnicodeMapping mapping =
{CreateTextEncoding(kTextEncodingUnicodeDefault,
kTextEncodingDefaultVariant, kUnicode16BitFormat),
CreateTextEncoding(kTextEncodingUnicodeDefault,
kUnicodeCanonicalDecompVariant, kUnicode16BitFormat),
kUnicodeUseLatestMapping};
    UniChar buffer[MAX_BUFFER_LENGTH];
    ByteCount inputRead, outputLen;
    OSStatus status;

    status = CreateUnicodeToTextInfo(&mapping, &textInfo);
    if (noErr != status) {
        NSLog(@"Failed to create UnicodeToTextInfo");
        exit(1);
    }

    status = ConvertFromUnicodeToText(textInfo, sizeof(characters),
characters, kTECKeepInfoFixMask, 0, NULL, NULL, NULL, MAX_BUFFER_LENGTH
* sizeof(UniChar), &inputRead, &outputLen, buffer);
    if (noErr != status) {
        NSLog(@"Failed to convert string");
        exit(1);
    }

    DisposeUnicodeToTextInfo(&textInfo);

    [pool release];
    return 0;
}

I tested this on Jaguar, but supposed to work on earlier versions. 
Pre-Jaguar TEC don't have precomposition capability.
Note I'm passing kTECKeepInfoFixMask to ConvertFromUnicodeToText()
above.

This bit keeps the last conversion info so that TEC ensures the
repeated conversion doesn't cause surprises with decomposed/surrogate
characters in the source buffer.

Hope this help,

Aki

On 2003.1.13, at 07:54  PM, Renaud Boisjoly wrote:

> Hi all!
>
> I'm trying to convert Unicode strings from the precomposed form to the
> decomposed form (or the other way around) under 10.1
>
> Under 10.2 I can use NSString's decomposedStringWithCanonicalMapping
> method, which works fine.
>
> But this is not supported under 10.1, which breaks my app.
>
> Has anyone ever done this using Text Encoding COnverter or Unicode
> Converter? Or perhaps by adapting GPL routines like GNOME's
> libunicode? (http://cvs.gnome.org/lxr/source/libunicode/decomp.h and
> http://cvs.gnome.org/lxr/source/libunicode/decomp.c)
>
> I've never adapted regular C routines like these to Onjective-C and my
> experience is quite limited in that area. Same goes with Carbon
> routines like TEC.
>
> If anyone is willing to share their experience with this type of
> stuff, it would really help me out.
>
> Here's something I got from this list which does encoding conversions,
> but I'm not sure how I'm supposed to call this function from my Cocoa
> code... I used kTextEncodingMacUnicode instead of the one in the
> original code, but I'm not sure if that is the right choice either...
>
> + (TECObjectRef) _unicodeID3v2ToMacTextConverter
> {
>    static TECObjectRef id3v2ToMacTextConverter = NULL;
>    if (id3v2ToMacTextConverter == NULL) {
>        OSStatus status;
>        status = TECCreateConverter(&id3v2ToMacTextConverter,
>                                    kTextEncodingMacUnicode,
> kTextEncodingUnicode);
>        if (status != 0) {
>            NSLog(@"TECCreateConverter() error %d", status);
>            return nil;
>        }
>    }
>    return id3v2ToMacTextConverter;
> }
>
> + (CFStringRef /* implies caller retain */) _convertUnicodeText:
>                              (ConstTextPtr) textBuffer ofLength:
> (unsigned int) length
> {
>    ByteCount actualInputConsumed;
>    ByteCount actualOutputProduced;
>    UInt8 outputBuffer[1024];
>    CFMutableStringRef returnString = CFStringCreateMutable(NULL, 0);
>    TECObjectRef textConverter = [self
> _unicodeID3v2ToMacTextConverter];
>    do {
>        OSStatus status;
>        status = TECConvertText(textConverter, textBuffer, length,
>                                &actualInputConsumed, outputBuffer,
> sizeof(outputBuffer),
>                                &actualOutputProduced);
>        if (status != 0) {
>            NSLog(@"TECConvertText() error %d", status);
>            return nil;
>        }
>        CFStringAppendCharacters(returnString, (const UniChar
> *)outputBuffer, actualOutputProduced);
>        length = length - actualInputConsumed;
>        textBuffer = textBuffer + actualInputConsumed;
>    } while (length > 0);
>    return returnString;
> }
> _______________________________________________
> cocoa-dev mailing list | <email_removed>
> Help/Unsubscribe/Archives:
> http://www.lists.apple.com/mailman/listinfo/cocoa-dev
> Do not post admin requests to the list. They will be ignored.

_______________________________________________
cocoa-dev mailing list | <email_removed>
Help/Unsubscribe/Archives: http://www.lists.apple.com/mailman/listinfo/cocoa-dev
Do not post admin requests to the list. They will be ignored.

Related mailsAuthorDate
mlUnicode canonical decomposed form and text encoding Renaud Boisjoly Jan 14, 04:54
mlRe: Unicode canonical decomposed form and text encoding Aki Inoue Jan 14, 20:39
mlRe: Unicode canonical decomposed form and text encoding Renaud Boisjoly Jan 14, 20:45
mlRe: Unicode canonical decomposed form and text encoding Renaud Boisjoly Jan 14, 22:08
mlRe: Unicode canonical decomposed form and text encoding Clark S. Cox III Jan 14, 23:05
mlRe: Unicode canonical decomposed form and text encoding Dietrich Epp Jan 14, 23:13
mlRe: Unicode canonical decomposed form and text encoding Aki Inoue Jan 14, 23:44
mlRe: Unicode canonical decomposed form and text encoding Renaud Boisjoly Jan 15, 01:11
mlRe: Unicode canonical decomposed form and text encoding Aki Inoue Jan 15, 01:39
mlRe: Unicode canonical decomposed form and text encoding Renaud Boisjoly Jan 15, 02:26
mlRe: Unicode canonical decomposed form and text encoding Renaud Boisjoly Jan 15, 02:43
mlRe: Unicode canonical decomposed form and text encoding Aki Inoue Jan 15, 02:44
mlRe: Unicode canonical decomposed form and text encoding Renaud Boisjoly Jan 15, 02:57