Pages

2012-09-05

Divisors.

I've been thinking about how to find divisors of an integer i (supposedly between 10 and 100 or so) manually and as effortlessly as possible.
I wrote the code below to check if the results are correct by comparison with a simple brute-force way.
In hand calculation, you have to find an integer a that satisfies the condition a 2 i < a + 1 2 instead of math.sqrt(i).

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# divisors.py - returns list of divisors
import math, sys, traceback

def check1by1(int_num): # check all numbers from 1 to int_num.
    int_num = abs(int_num)
    if int_num == 0:
        return None
    else:
        div_list = []
        for i in range(1, int_num + 1):
            if int_num % i == 0:
                div_list.append(i)
        return div_list

def getdivisors(int_num): # avoid unnecessary calculation.
    """Return list of divisors of int_num. (None if int_num == 0.)"""
    int_num = abs(int_num)
    if int_num == 1:
        return [1]
    elif int_num == 0:
        return None
    else:
        div_list = [1, int_num]
        start = 2
        step = 1
        if int_num % 2 != 0: # if int_num is odd, even divisors are excluded.
            start = 3
            step = 2
        for i in range(start, int(math.sqrt(int_num)) + 1, step):
            if int_num % i == 0:
                div_list.append(i)
                if i != int_num / i:
                    div_list.append(int_num / i)
        return sorted(div_list)

if __name__ == '__main__':
    try:
        num = int(sys.argv[1])
    except IndexError:
        print('usage: python divisors.py integer')
        sys.exit(0)
    simple = check1by1(num)
    print('check1by1  ', num, simple)
    shortcut = getdivisors(num)
    print('getdivisors', num, shortcut)
    if simple != shortcut: # Error checking.
        print('Error.')

Update, September 7, 11:43 p.m. JST: xrange() instead of range(), benchmark().
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# divisors.py - returns list of divisors
import math, sys, time

def benchmark(int_num, fn):
    start_time = time.clock()
    result = fn(int_num)
    elapsed_time = time.clock() - start_time
    return result, elapsed_time

def check1by1(int_num): # check all numbers from 1 to int_num.
    int_num = abs(int_num)
    if int_num == 0:
        return None
    else:
        div_list = []
        for i in xrange(1, int_num + 1):
            if int_num % i == 0:
                div_list.append(i)
        return div_list

def getdivisors(int_num): # avoid unnecessary calculation.
    """Return list of divisors of int_num. (None if int_num == 0.)"""
    int_num = abs(int_num)
    if int_num == 1:
        return [1]
    elif int_num == 0:
        return None
    else:
        div_list = [1, int_num]
        start = 2
        step = 1
        if int_num % 2 != 0: # if int_num is odd, even divisors are excluded.
            start = 3
            step = 2
        for i in xrange(start, int(math.sqrt(int_num)) + 1, step):
            if int_num % i == 0:
                div_list.append(i)
                div_result = int_num / i
                if i != div_result:
                    div_list.append(div_result)
        return sorted(div_list)

if __name__ == '__main__':
    try:
        num = int(sys.argv[1])
    except IndexError:
        print('usage: python divisors.py integer')
        sys.exit(0)
    simple = benchmark(num, check1by1)
    shortcut = benchmark(num, getdivisors)
    if simple[0] != shortcut[0]: # Error checking.
        print('Error.')
    print('check1by1  ', num, simple)
    print('getdivisors', num, shortcut)
    print('time difference: ' + str(simple[1] - shortcut[1]) + ' sec.')

2012-09-02

Python: Filtering Delicious bookmarks by tags

bmconv.py is required.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# bmfilter.py - Filter bookmarks by tags.
import bmconv

def tagfilter(tags_list):
    """Return a list of bookmark dictionaries."""
    return filter(lambda x: set(tags_list) <= set(x['tags']), bmconv.main())

if __name__ == '__main__':
    print(tagfilter(['book', 'history']))
The above example outputs only bookmarks that have both 'book' and 'history' tags.

Update, September 2, 8:41 p.m. JST: AND/OR filters.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# bmfilter.py - Filter bookmarks by tags.
import bmconv

def andfilter(tags_list):
    """Return a list of bookmarks that have all the tags in tags_list."""
    return filter(lambda x: set(tags_list) <= set(x['tags']), bmconv.main())

def orfilter(tags_list):
    """Return a list of bookmarks that have any of the tags in tags_list."""
    return filter(lambda x: len(set(tags_list).intersection(set(x['tags']))) > 0, bmconv.main())

if __name__ == '__main__':
#    print(andfilter(['book', 'history']))
    print(orfilter(['cd', 'dvd']))

Update, September 3, 6:59 p.m. JST: Changed function name and arguments. Added case sensitivity switch.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# bmfilter.py - Filter bookmarks by tags.
import bmconv

def tagfilter(tags_list, bookmarks_list, filter_type_and=True, ignore_case=True):
    """Return a list of dictionaries of bookmarks filtered from bookmarks_list by tags in tags_list."""
    
    def proc_case(str_list):
        if ignore_case:
            return map(lambda x: x.upper(), str_list)
        else: # case sensitive
            return str_list

    if filter_type_and: # AND
        return filter(lambda x: set(proc_case(tags_list)) <= set(proc_case(x['tags'])), bookmarks_list)
    else: # OR
        return filter(lambda x: len(set(proc_case(tags_list)).intersection(set(proc_case(x['tags'])))) > 0, bookmarks_list)

if __name__ == '__main__':
    print(tagfilter(['book', 'history'], bmconv.main()))
##      bookmarks that have both tags 'book' and 'history'
##      (case insensitive. also matches 'Book', 'History', etc.)
    print(tagfilter(['cd', 'dvd'], bmconv.main(), False))
##      bookmarks that have any of tags 'cd' and 'dvd'
##      (case insensitive. also matches 'CD', 'Cd', 'DVD', 'Dvd', etc.)
    print(tagfilter(['Apple'], bmconv.main(), True, False))
##      doesn't match 'apple'.

Python: Converting Delicious bookmarks.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# bmconv.py - Read Delicious.com bookmark file and convert it into a list of dictionaries.
import re

bookmark_file = 'delicious.html'

def main():
    """Return a list of dictionaries of bookmarks."""
    lines_list = []
    with open(bookmark_file, 'r') as f:
        lines_list = f.readlines()
    entries_list = []
    for idx, line in enumerate(lines_list):
        entry = {}
        if re.match(r'^<DT>', line):
            entry['url'] = re.match(r'^.*HREF=\"([^\"]+)\"', line).group(1)
            entry['add_date'] = re.match(r'^.*ADD_DATE=\"([^\"]+)\"', line).group(1)
            entry['private'] = re.match(r'^.*PRIVATE=\"([^\"]*)\"', line).group(1)
            entry['tags'] = re.match(r'^.*TAGS=\"([^\"]*)\"', line).group(1).split(',')
            entry['title'] = re.match(r'^.*<A [^>]+>(.*)</A>', line).group(1)
            if re.match(r'^<DD>', lines_list[idx + 1]):
                dd_tmp = []
                increment = 1
                try:
                    while True:
                        if re.match(r'^<DT>', lines_list[idx + increment]):
                            break
                        dd_tmp.append(re.match(r'^(<DD>)?(.*)$', lines_list[idx + increment]).group(2))
                        increment += 1
                except:
                    pass
                entry['description'] = '\n'.join(dd_tmp)
            entries_list.append(entry)
    return entries_list

if __name__ == '__main__':
    print(main())

Download bmconv.py from Google Drive

2012-08-26

Using PyRSS2Gen in time zones other than GMT

PyRSS2Gen

--- PyRSS2Gen.py.orig 2012-08-25 10:22:14.292968887 +0000
+++ PyRSS2Gen.py 2012-08-25 13:38:58.838836110 +0000
@@ -57,12 +57,13 @@
     # Isn't there a standard way to do this for Python?  The
     # rfc822 and email.Utils modules assume a timestamp.  The
     # following is based on the rfc822 module.
-    return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
+    return ("%s, %02d %s %04d %02d:%02d:%02d %s" % (
             ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()],
             dt.day,
             ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
-            dt.year, dt.hour, dt.minute, dt.second)
+            dt.year, dt.hour, dt.minute, dt.second,
+            dt.strftime('%z'))).strip()
 
         
 ##

Update, August 28, 2:07 p.m. JST: Output timezone in any case ('GMT' by default).
--- PyRSS2Gen.py.orig 2012-08-25 10:22:14.292968887 +0000
+++ PyRSS2Gen.py 2012-08-28 04:31:25.535324750 +0000
@@ -57,12 +57,15 @@
     # Isn't there a standard way to do this for Python?  The
     # rfc822 and email.Utils modules assume a timestamp.  The
     # following is based on the rfc822 module.
-    return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
+    tz = 'GMT' # default timezone
+    if dt.tzinfo:
+        tz = dt.strftime('%z')
+    return "%s, %02d %s %04d %02d:%02d:%02d %s" % (
             ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()],
             dt.day,
             ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
-            dt.year, dt.hour, dt.minute, dt.second)
+            dt.year, dt.hour, dt.minute, dt.second, tz)
 
         
 ##

Update, September 1, 5:06 p.m. JST: Simplified the patch.
--- PyRSS2Gen.py.orig 2012-08-25 10:22:14.292968887 +0000
+++ PyRSS2Gen.py 2012-09-01 07:16:36.291449545 +0000
@@ -57,12 +57,13 @@
     # Isn't there a standard way to do this for Python?  The
     # rfc822 and email.Utils modules assume a timestamp.  The
     # following is based on the rfc822 module.
-    return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
+    return "%s, %02d %s %04d %02d:%02d:%02d %s" % (
             ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()],
             dt.day,
             ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
-            dt.year, dt.hour, dt.minute, dt.second)
+            dt.year, dt.hour, dt.minute, dt.second,
+            dt.strftime('%z') if dt.tzinfo else 'GMT') # default timezone is 'GMT'
 
         
 ##

2012-08-05

Making ruby refe output without garble on UTF-8 terminal

Edit refe-1_9_3 as follows:

#!/bin/sh
cd "`dirname "$0"`"
exec ruby -Ke -I bitclust/lib bitclust/bin/refe -d db-1_9_3 -e w "$@"

(add '-e w' as an option to refe)

2012-01-03

Patch for test_commands.py in Python 2.5.6

Something seems to be wrong with a test script in Python 2.5.6.

Environment:

  • CentOS 6.2 x86_64 on VMware
  • Python 2.5.6

Symptom:

  • make test fails.

Solution:

  • Apply the patch available here on this issue or the following (patch on my Google Docs) to Lib/test/test_commands.py.


--- Lib/test/test_commands.py.orig 2006-06-29 04:10:08.000000000 +0000
+++ Lib/test/test_commands.py 2012-01-02 11:19:28.535171924 +0000
@@ -46,11 +46,7 @@
         #     drwxr-xr-x   15 Joe User My Group     4096 Aug 12 12:50 /
         # Note that the first case above has a space in the group name
         # while the second one has a space in both names.
-        pat = r'''d.........   # It is a directory.
-                  \+?          # It may have ACLs.
-                  \s+\d+       # It has some number of links.
-                  [^/]*        # Skip user, group, size, and date.
-                  /\.          # and end with the name of the file.
+        pat = r'''^.*(\/\.)[\ ]*[\n\r]*$
                '''
 
         self.assert_(re.match(pat, getstatus("/."), re.VERBOSE))


Tips for building Python 2.5.6 with SSL on CentOS 6.2 (x86_64)

You need to edit the following files because of the headers and libraries paths on 64bit CentOS.
  • Modules/Setup.dist (Python 2.5.6)
  • setup.py (ssl package)
First, edit Modules/Setup.dist as follows (patch file on Google Docs page):

--- Modules/Setup.dist.orig 2006-08-06 07:26:21.000000000 +0000
+++ Modules/Setup.dist 2012-01-02 16:09:27.904863909 +0000
@@ -203,10 +203,10 @@
 
 # Socket module helper for SSL support; you must comment out the other
 # socket line above, and possibly edit the SSL variable:
-#SSL=/usr/local/ssl
-#_ssl _ssl.c \
-# -DUSE_SSL -I$(SSL)/include -I$(SSL)/include/openssl \
-# -L$(SSL)/lib -lssl -lcrypto
+SSL=/usr
+_ssl _ssl.c \
+ -DUSE_SSL -I$(SSL)/include -I$(SSL)/include/openssl \
+ -L$(SSL)/lib64 -lssl -lcrypto
 
 # The crypt module is now disabled by default because it breaks builds
 # on many systems (where -lcrypt is needed), e.g. Linux (I believe).

Then build Python 2.5.6 as usual.

Next, you may need ssl package from pypi.
Edit setup.py as follows (patch file on Google Docs page):

--- setup.py.orig 2009-07-28 00:45:12.000000000 +0000
+++ setup.py 2012-01-02 16:40:09.447439694 +0000
@@ -130,7 +130,8 @@
             ssl_incs += krb5_h
 
     ssl_libs = find_library_file(compiler, 'ssl',
-                                 ['/usr/lib'],
+                                 ['/usr/lib',
+                                  '/usr/lib64'],
                                  ['/usr/local/lib',
                                   '/usr/local/ssl/lib',
                                   '/usr/contrib/ssl/lib/'

Then, build the package with make.

cf. Getting SSL Support in Python 2.5.1