The Mudcat Café TM
Thread #135626   Message #3100959
Posted By: GUEST,Grishka
23-Feb-11 - 05:22 AM
Thread Name: Tech: Non-ASCII character display problems
Subject: RE: Tech: Non-ASCII character display problems
I obviously failed to convince those in charge so far, and we are left puzzled once more – tant pis.

The tool being finished, I feel I should post it nevertheless, in case some adepts like Artful Codger or Jon Banjo want to study it and either recommend it to Mudcat or use it themselves when asked once more to convert something (saving the trouble of reconstructing the HTML tags manually). Feel free to adapt it to your needs and taste. Note the total absence of potentially harmful code of file manipulation, internet access, system calls etc.

Farewell geekdom, hello life!

import java.awt.*;
import java.awt.datatransfer.*;
import java.awt.event.*;
import java.nio.*;
import java.nio.charset.Charset;
import javax.swing.*;
import javax.swing.event.*;
import javax.swing.text.*;
import javax.swing.text.html.*;

/**
 * @author Grishka, donated to Mudcat.org and the public domain.
 */
public class CodepageTurner extends JFrame
{
 // codepage used in the SOURCE OF COPYING; Notepad uses "windows-1252"
 static final String cMyEditorCharsetName = ""; // empty: assume system codepage
 Charset mMyEditorCharset = Charset.defaultCharset (); // unless ...
 Object [] mCharsets;
 Charset mCharSet = null;

 JScrollPane mHtmlScrollPane = new JScrollPane ();
 JTextPane mHtmlPane = new JTextPane ();
 JScrollPane mCodepagesScrollPane = new JScrollPane ();
 JList mCodepageList;
 JLabel mOutput = new JLabel ();
 static final Color mGreenColor = new Color (0, 160, 0);
 static final Color mAmberColor = new Color (150, 130, 0);

 byte [] mClip = new byte [0];
 String mClipOut = null;

 private CodepageTurner ()
 {
  setTitle ("CodepageTurner for HTML source code");
  setDefaultCloseOperation (javax.swing.WindowConstants.EXIT_ON_CLOSE);
  getContentPane ().setLayout (new GridBagLayout ());

  // label for output
  GridBagConstraints lConstraints = new GridBagConstraints (0, 0, 1, 1,
    1., 0., GridBagConstraints.WEST, GridBagConstraints.BOTH,
    new Insets (0, 5, 0, 5), 0, 0);
  getContentPane ().add (mOutput, lConstraints);

  // buttons to capture the clipboard ====================================
  JButton lCaptureButton = new JButton ("Capture");
  lCaptureButton.addActionListener (new ActionListener () {
   public void actionPerformed (ActionEvent evt) {
    Capture (false);
   }
  });
  lConstraints = new GridBagConstraints (1, 0, 1, 1,
    0., 0., GridBagConstraints.CENTER, GridBagConstraints.BOTH,
    new Insets (0, 0, 0, 0), 0, 0);
  getContentPane ().add (lCaptureButton, lConstraints);
  JButton lRevertButton = new JButton ("Revert");
  lRevertButton.setToolTipText ("Capture text, reverting erroneous byte-escaping");
  lRevertButton.addActionListener (new ActionListener () {
   public void actionPerformed (ActionEvent evt) {
    Capture (true);
   }
  });
  lConstraints = new GridBagConstraints (2, 0, 1, 1,
    0., 0., GridBagConstraints.CENTER, GridBagConstraints.BOTH,
    new Insets (0, 0, 0, 0), 0, 0);
  getContentPane ().add (lRevertButton, lConstraints);

  // split pane
  JSplitPane lSplitPane = new JSplitPane ();
  lSplitPane.setDividerLocation (450);
  lSplitPane.setOrientation (JSplitPane.HORIZONTAL_SPLIT);
  lConstraints = new GridBagConstraints (0, 1, 3, 1,
    1., 1., GridBagConstraints.CENTER, GridBagConstraints.BOTH,
    new Insets (0, 0, 0, 0), 0, 0);
  getContentPane ().add (lSplitPane, lConstraints);

  // area to display the (approximate) effect in HTML ====================
  mHtmlScrollPane.setPreferredSize (new Dimension (650, 600));
  lSplitPane.setLeftComponent (mHtmlScrollPane);
  mHtmlPane.setContentType ("text/html");
  mHtmlPane.setEditable (false);
  PreventTheHtmlPaneFromFiring ();
  mHtmlScrollPane.setViewportView (mHtmlPane);

  // List of all codepages ("charsets") ==================================
  mCodepagesScrollPane.setPreferredSize (new Dimension (350, 600));
  lSplitPane.setRightComponent (mCodepagesScrollPane);
  mCharsets = Charset.availableCharsets ().values ().toArray ();
  mCodepageList = new JList (mCharsets);
  mCodepageList.setLayoutOrientation (JList.VERTICAL_WRAP);
  mCodepageList.addListSelectionListener (new ListSelectionListener () {
   public void valueChanged (ListSelectionEvent aEvent)
   {
    Charset lCharSet = (Charset) mCharsets [mCodepageList.getSelectedIndex ()];
    if (lCharSet != mCharSet)
    {
     mCharSet = lCharSet;
     Process (mHtmlPane.viewToModel (mHtmlPane.getVisibleRect ().getLocation ()));
    }
   }
  });
  mCodepageList.addComponentListener (new ComponentAdapter () {
   @Override
   public void componentResized (ComponentEvent aEvt)
   {
    LayoutTheList ();
   }
  });
  LayoutTheList ();
  pack ();
  if (cMyEditorCharsetName.length () > 0)
  {
   if (Charset.isSupported (cMyEditorCharsetName))
    mMyEditorCharset = Charset.forName (cMyEditorCharsetName);
   else
    Alarm ("Codepage " + cMyEditorCharsetName
    + " not found; we assume " + mMyEditorCharset.name () + " instead.");
  }
  mCharSet = mMyEditorCharset;
 }
 private void PreventTheHtmlPaneFromFiring ()
 {
  mHtmlPane.setEditorKit (new HTMLEditorKit () {
   @Override
   public ViewFactory getViewFactory () {
    return new HTMLEditorKit.HTMLFactory () {
     @Override
     public View create (Element elem) {
      if (elem.getAttributes ().getAttribute (
        StyleConstants.NameAttribute) == HTML.Tag.INPUT)
       return new FormView (elem) {
        @Override
        protected void submitData (String aStr) { Msg (aStr); }
        @Override
        protected void imageSubmit (String aStr) { Msg (aStr); }
        private void Msg (String aStr)
        {
         JOptionPane.showMessageDialog (getContentPane (), aStr);
        }
       };
      return super.create (elem);
     }
    };
   }
  });
  mHtmlPane.addHyperlinkListener (new HyperlinkListener () {
   public void hyperlinkUpdate (HyperlinkEvent aEvt)
   {
    if (aEvt.getEventType () == HyperlinkEvent.EventType.ACTIVATED)
     JOptionPane.showMessageDialog (getContentPane (), aEvt.getDescription ());
   }
  });
 }
 private void LayoutTheList ()
 {
  int lRowHeight = mCodepageList.getCellBounds (0, 0).height;
  mCodepageList.setVisibleRowCount (
    mCodepagesScrollPane.getViewport ().getHeight () / lRowHeight);
  mCodepagesScrollPane.setViewportView (mCodepageList);
 }
 private void Capture (boolean aRevert)
 {
  try {
   String lClip = (String) Toolkit.getDefaultToolkit ().getSystemClipboard ()
     .getData (DataFlavor.stringFlavor);
   if (lClip.equals (mClipOut) && JOptionPane.showConfirmDialog (this,
     "That's my own output. Use it anyway?", "Clipboard unchanged",
     JOptionPane.YES_NO_OPTION) != 0)
    return;
   if (aRevert && JOptionPane.showConfirmDialog (this,
     "Change any escaped single byte to its raw character?",
     "Revert accidental encoding", JOptionPane.YES_NO_OPTION) == 0)
    for (char lChar = 128; lChar < 256; lChar++)
     lClip = lClip.replace ("&#x" + Integer.toHexString (lChar) + ";",
          Character.toString (lChar))
         .replace ("&#x" + Integer.toHexString (lChar).toUpperCase () + ";",
          Character.toString (lChar))
         .replace ("&#" + Integer.toString (lChar) + ";",
          Character.toString (lChar));
   mClip = lClip.getBytes (mMyEditorCharset);
  }
  catch (UnsupportedFlavorException aExc) {
   Alarm ("No text found in the clipboard");
  }
  catch (Exception aExc) { Alarm (aExc.toString ()); }
  Process (0);
 }
 private void Process (int aTextIdxAtTop)
 {
  try {
   int lCntChar = 0, lCntConv = 0, lCntBad = 0;
   CharBuffer lDecoded = mCharSet.decode (ByteBuffer.wrap (mClip));

   StringBuffer lHtml = new StringBuffer ();
   while (lDecoded.remaining () > 0)
   {
    char lGot = lDecoded.get ();
    if (lGot > 127)
    {
     lCntConv++;
     lHtml.append ("&#" + Integer.toString (lGot) + ";");
     if (lGot == 0xFFFD)
      lCntBad++;
    }
    else
     lHtml.append (lGot);
    lCntChar++;
   }
   mClipOut = lHtml.toString ();

   StringSelection lSel = new StringSelection (mClipOut);
   Toolkit.getDefaultToolkit ().getSystemClipboard ().setContents (lSel, lSel);

   mHtmlPane.setText (mClipOut);
   mHtmlPane.select (aTextIdxAtTop, aTextIdxAtTop);
   mOutput.setForeground ((lCntBad == 0)? ((lCntConv == 0)? mGreenColor
     : mAmberColor) : Color.RED);
   mOutput.setText (Integer.toString (lCntChar) + " charcters, "
     + lCntConv + " non-ASCII, " + lCntBad + " unmatched");
  }
  catch (Exception aExc)
  {
   Alarm (aExc.toString ());
  }
 }
 private void Alarm (String aError)
 {
  Toolkit.getDefaultToolkit ().beep ();
  mOutput.setForeground (Color.RED);
  mOutput.setText ("ERROR");
  mHtmlPane.setText ("Error: " + aError);
 }
 public static void main (String[] aArgs)
 {
  EventQueue.invokeLater (new Runnable () {
   public void run ()
   {
    new CodepageTurner ().setVisible (true);
   }
  });
 }
}