Re: Transcribing non-ascii URLs [was: revised "generic syntax" internet draft]

Gary Adams - Sun Microsystems Labs BOS <Gary.Adams@east.sun.com> Wed, 16 April 1997 21:21 UTC

Received: from cnri by ietf.org id aa21088; 16 Apr 97 17:21 EDT
Received: from services.Bunyip.Com by CNRI.Reston.VA.US id aa22660; 16 Apr 97 17:21 EDT
Received: (from daemon@localhost) by services.bunyip.com (8.8.5/8.8.5) id RAA18726 for uri-out; Wed, 16 Apr 1997 17:00:02 -0400 (EDT)
Received: from mercury.Sun.COM (mercury.Sun.COM [192.9.25.1]) by services.bunyip.com (8.8.5/8.8.5) with SMTP id QAA18713 for <uri@services.bunyip.com>; Wed, 16 Apr 1997 16:59:57 -0400 (EDT)
Received: from East.Sun.COM ([129.148.1.241]) by mercury.Sun.COM (SMI-8.6/mail.byaddr) with SMTP id OAA24208; Wed, 16 Apr 1997 14:07:07 -0700
Received: from suneast.East.Sun.COM by East.Sun.COM (SMI-8.6/SMI-5.3) id QAA28833; Wed, 16 Apr 1997 16:59:16 -0400
Received: from zeppo.East.Sun.COM by suneast.East.Sun.COM (SMI-8.6/SMI-SVR4) id QAA15068; Wed, 16 Apr 1997 16:59:17 -0400
Received: by zeppo.East.Sun.COM (SMI-8.6/SMI-SVR4) id QAA04656; Wed, 16 Apr 1997 16:53:42 -0400
Date: Wed, 16 Apr 1997 16:53:42 -0400
From: Gary Adams - Sun Microsystems Labs BOS <Gary.Adams@east.sun.com>
Message-Id: <199704162053.QAA04656@zeppo.East.Sun.COM>
To: bert@w3.org, uri@services.bunyip.com
Subject: Re: Transcribing non-ascii URLs [was: revised "generic syntax" internet draft]
Sender: owner-uri@bunyip.com
Precedence: bulk

On a similar note, here's how a Unicode client might take an input URL
and transcode the bits before sending to a server which only accepts
a platform specific encoding. This was hacked together today on JDK 1.1.1
and may still be buggy (i.e., only tested on Solaris 2.5 with LANG=en_US).

 - save in file UTF8URL.java
 - compile as  "javac UTF8URL.java"
 - run as "java UTF8URL"

-----------------------------------------------------------------------
import java.awt.* ;
import java.awt.event.* ;
import java.net.URLEncoder;
import sun.io.CharToByteConverter;

public class UTF8URL extends Frame 
	    	     implements TextListener, ItemListener{

   public String encoding;
   public String encoded_text;

   private CharToByteConverter btc = null;

   private String [] charset_list = {
	"8859_1", "8859_2", "8859_3", "8859_5",
	"8859_6", "8859_7", "8859_9", "Cp1250",
	"Cp1251", "Cp1252", "Cp1253", "Cp1254",
	"Cp1255", "Cp1256", "Cp1257", "Cp1258",
	"Cp437", "Cp737", "Cp775", "Cp850",
	"Cp852", "Cp855", "Cp857", "Cp860",
	"Cp861", "Cp862", "Cp863", "Cp864",
	"Cp865", "Cp866", "Cp869", "Cp874",
	"EUCJIS", "JIS", "MacArabic", "MacCentralEurope",
	"Macintosh", "MacCroatian", "MacCyrillic", "MacDingbat",
	"MacGreek", "MacHebrew", "MacIceland", "MacRoman",
	"MacRomania", "MacSymbol", "MacThai", "MacTurkish",
	"MacUkraine", "SJIS", "UTF8" 
	};

   public UTF8URL () {
      super("Character set demo");
      Panel p = new Panel ();
      Choice c = new Choice();
      TextArea t = new TextArea("", 3, 45, 
				TextArea.SCROLLBARS_VERTICAL_ONLY);

      for (int i = 0 ; i < charset_list.length; i++){
	c.add(charset_list[i]);
      }

      c.addItemListener(this);
      encoding = charset_list[0];
      try {
         btc = CharToByteConverter.getConverter(encoding);
      } catch (Exception e) {
         System.err.println(e.toString() + " " + encoding);
      }
      t.addTextListener(this);

      p.add(t);
      p.add(c);

      add(p);
      pack();
      show();
   }

   public void itemStateChanged(ItemEvent ie){
      encoding = ((Choice)(ie.getSource())).getSelectedItem();
   }

   public void textValueChanged(TextEvent te){
      String input_text =  ((TextArea)(te.getSource())).getText();
      int len = input_text.length();
      byte [] octets = new byte[len];

      // Transcode the user input characters to the specified encoding
      try {
         int length = btc.convert(input_text.toCharArray(), 0, len, 
		               octets, 0, len);
         String converted = new String(octets);

         // Perform URL escaping for unsafe characters 
         encoded_text = URLEncoder.encode(converted);

         System.out.println( encoding + "\t" 
			   + input_text + "\t" 
			   + encoded_text);
     } catch (Exception e) {
	System.err.println(e.toString());
     }
   }

   public static void main (String[] args) {
       UTF8URL udemo = new UTF8URL();
   }
}
\
/gra