Pages

2013-09-05

2013-04-15

subdivisionjp.R

I made an R version of the conversion tool for the names of principal subdivision in Japan.

subdivisionjp.zip (Download page at Google Drive)

Documentation
http://rpubs.com/ogura/5493

(Update: 2013-04-16 22:50 JST)
I didn't know of package Nippon, which includes prefectures table and many other useful features.
I wonder if you should have Unicode data in Unicode character codes like "<U+6771><U+4EAC><U+90FD>" instead of "東京都".
If you have original data in Unicode characters like "<U+6771><U+4EAC><U+90FD>", R console outputs text in a readable way, but the original data itself is less human-readable.
If you have original data directly put like "東京都" (I don't know the name of representation), the original data is human-readable, but console outputs garbled text.

Converting principal subdivision names in Japan

When handling data sets, Japanese characters are almost always a pain in the neck because of garbled text in data-processing software such as R.
Replacing Japanese characters with Roman notations manually is a very tedious task that many people may not want to do.
Commonly used names like principal subdivisions, or prefecture (the United States' counterparts are states), among others, would be handy if you can get their Roman notations quickly.
So I made a script to convert names of Japan's principal subdivisions between Japanese (Kanji) and Roman.

iso3166-2jp (Google Apps Script. I don't know why, but Google sign-in is required to view the code though I'm sharing it for anonymous access.)

If you make a copy of the above script and include it as a library, you can use its functions in your Google Sheets.



Japan's principal subdivisions:
http://en.wikipedia.org/wiki/ISO_3166-2:JP
http://ja.wikipedia.org/wiki/ISO_3166-2:JP

2013-03-21

JavaScript: Feedly feeds extractor bookmarklet

(1st update, March 21, 2013, 15:20 JST: include feed names.)
(2nd update, March 21, 2013, 21:58 JST: bug fix and JSON/OPML option added.)
(3rd update, March 21, 2013, 22:17 JST: change names of some keys in JSON.)

Feedly is a great feed reader, but it doesn't seem to have an export function.

So I created a bookmarklet to extract URLs of your feed subscriptions and their categories and output them in JSON/OPML format.

Feedly feeds extractor bookmarklet

This bookmarklet only works at your Feedly index page (http://www.feedly.com/home#index).

After running this code, a text area containing the JSON data will be created at the bottom of the page.

Pretty-printed version:

// Feedly feeds extractor bookmarklet by Toshiyuki Ogura
// Run this code at http://www.feedly.com/home#index
// A text area containing your feed urls in JSON format will be created at the bottom of the page.

javascript:(function(){

    if (window.location.href != 'http://www.feedly.com/home#index') {
    } else {

        function docEvaluateArray (expr, doc, context, resolver) {
            doc = doc ? doc : (context ? context.ownerDocument : document);
            resolver = resolver ? resolver : null;
            context = context ? context : doc; 
            
            var result = doc.evaluate(expr, context, resolver, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
            var a = [];
            for(var i = 0; i < result.snapshotLength; i++) {
                a[i] = result.snapshotItem(i);
            }
            return a;
        }

        /*
        // This function doesn't work because of HTMLUnknownElement.
        function jsonToOPMLString (jsonObj) {
            var xmlDeclaration = '<?xml version="1.0" encoding="utf-8"?>';
            var opmlElement = document.createElement('opml');
            opmlElement.setAttribute('version', '1.0');
            var headElement = opmlElement.createElement('head');
            headElement.createElement('title');
            headElement.createElement('dateCreated');
            headElement.createElement('dateModified');
            headElement.createElement('ownerName');
            headElement.createElement('ownerEmail');
            var bodyElement = opmlElement.createElement('body');
            for (var i = 0; i < jsonObj.length; i++) {
                var categoryOutline = bodyElement.createElement('outline');
                categoryOutline.setAttribute('text', jsonObj[i]['categoryName']);
                for (var j = 0; j < jsonObj[i]['feeds'].length; j++) {
                    var feedObj = jsonObj[i]['feeds'][j];
                    var urlOutline = categoryOutline.createElement('outline');
                    urlOutline.setAttribute('text', feedObj['title']);
                    urlOutline.setAttribute('type', 'link');
                    urlOutline.setAttribute('xmlUrl', feedObj['xmlUrl']);
                }
            }
            return xmlDeclaration + opmlElement.innerHTML;
        }
        */

        function jsonToOPMLString (jsonObj) {
            var newLine = '\n'; // for pretty-printing
            function getTagString(tagName, text, attrArray) { // attrArray = [['attr1', 'val1'], ['attr2', 'val2']];
                var attrStringArray = [];
                if(attrArray) {
                    for (var i = 0; i < attrArray.length; i++) {
                        attrStringArray.push(attrArray[i][0] + '="' + attrArray[i][1] + '"');
                    }
                    var result = '<' + tagName + ' ' + attrStringArray.join(' ');
                } else {
                    var result = '<' + tagName;
                }
                if (text) {
                    if (text[0] == '<') {
                        return result + '>' + newLine + text + '</' + tagName + '>' + newLine;
                    } else {
                        return result + '>' + text + '</' + tagName + '>' + newLine;
                    }
                } else {
                    return result + '/>' + newLine;
                }
            }

            var categoryArray = [];
            
            for (var i = 0; i < jsonObj.length; i++) {
                var feedsArray = [];
                for (var j = 0; j < jsonObj[i]['feeds'].length; j++) {
                    var outlineFeed = getTagString('outline', undefined,
                                                  [ [ 'text',   jsonObj[i]['feeds'][j]['title'] ],
                                                    [ 'title',  jsonObj[i]['feeds'][j]['title'] ],
                                                    [ 'type',   'rss'                              ],
                                                    [ 'xmlUrl', jsonObj[i]['feeds'][j]['xmlUrl']   ] ]);
                    feedsArray.push(outlineFeed);
                }
                var outlineCategory = getTagString('outline', feedsArray.join(''),
                                                   [ [ 'text',  jsonObj[i]['category'] ],
                                                     [ 'title', jsonObj[i]['category'] ] ]);
                categoryArray.push(outlineCategory);
            }
            var titleTag = getTagString('title', 'My Feedly feeds');
            var nowTimeString = new Date().toUTCString();
            var dateCreatedTag = getTagString('dateCreated', nowTimeString);
            var dateModifiedTag = getTagString('dateModified', nowTimeString);
            var ownerNameTag = getTagString('ownerName', 'My name');
            var ownerEmailTag = getTagString('ownerEmail', 'My email');
            var headTag = getTagString('head', titleTag + dateCreatedTag + dateModifiedTag + ownerNameTag + ownerEmailTag);
            var bodyTag = getTagString('body', categoryArray.join(''));
            var opmlTag = getTagString('opml', headTag + bodyTag);
            var xmlDeclaration = '<?xml version="1.0" encoding="utf-8"?>' + newLine;
            return xmlDeclaration + opmlTag;
        }

        var bulk = docEvaluateArray("//div[@id='mainArea']/div[starts-with(@class,'cell')]");
        var items = Array.prototype.slice.call(bulk[0].childNodes);
        for (var i = 1; i < bulk.length; i++) {
            var arrayTemp = Array.prototype.slice.call(bulk[i].childNodes);
            items = items.concat(arrayTemp);
        }
        var groupsArray = [];
        var categoryObj = {};
        var feedsArray = [];
        for (var j = 0; j < items.length; j++) {
            if (items[j].nodeName == 'H2') {
                if (categoryObj['category'] == undefined) {
                    categoryObj['category'] = items[j].innerHTML.replace(/^\s*/, '').replace(/\s*$/, '');
                } else {
                    categoryObj['feeds'] = feedsArray;
                    groupsArray.push(categoryObj);
                    categoryObj = {};
                    feedsArray = [];
                    categoryObj['category'] = items[j].innerHTML.replace(/^\s*/, '').replace(/\s*$/, '');
                }
            } else if (items[j].nodeName == 'DIV') {
                var feedObj = {'title' : items[j].childNodes[2].nodeValue.replace(/^[\s\n]*/g, '').replace(/[\s\n]*$/g, ''),
                               'xmlUrl' : items[j].getAttribute('data-uri').replace('subscription/feed/','') };
                feedsArray.push(feedObj);
            }
        }
        var result = document.createElement('textarea');
        result.setAttribute('rows', '10');
        result.setAttribute('cols', '100%');
        document.querySelector('div#mainBar').appendChild(result);

        var choice = confirm('Press OK to get JSON,\nCancel to get OPML');
        if (choice) {
            result.value = JSON.stringify(groupsArray, undefined, 2);
        } else {
            result.value = jsonToOPMLString(groupsArray);
        }

    }

})();