Logo Search packages:      
Sourcecode: faumachine version File versions  Download package

check_license.py

#!/usr/bin/python

# $Id: check_license.py,v 1.22 2009-02-26 17:46:16 potyra Exp $
#
# Copyright (C) 2009 FAUmachine Team <info@faumachine.org>.
# This program is free software. You can redistribute it and/or modify it
# under the terms of the GNU General Public License, either version 2 of
# the License, or (at your option) any later version. See COPYING.

import sys
import string
import re
import os
import struct

# number of lines scanned in source files
SOURCE_LINES=50

# don't traverse to any directories named like these
BLACKLISTED_DIRS = (
                  # cvs specific
                  "CVS", 
                  # created by automake
                  ".deps"
                  )
# skip these exact directories
BLACKLISTED_PATHDIRS= (
                  # generated by autoconf
                  "./autom4te.cache",
                  # FIXME 
                  "./experiments",
                  # FIXME
                  "./scripts/test-FAUmachine/test-engine/templates",
                  # FIXME
                  "./node-pc/keymaps"
                  )
# if a file named like this appear in any directory, skip it.
BLACKLISTED_FILES = (   
                  # cvs specific
                  ".cvsignore", 
                  # must have at least 2 files in this list
                  "README"
                  )
# skip checking files with the exact path
BLACKLISTED_PATHFILES = (
                  # that's the destination file
                  "./copyright",
                  # only an informational text file
                  "./AUTHORS",
                  # our changelog file
                  "./NEWS",
                  # informational text file
                  "./README.bsd",
                  # informational text file
                  "./README.macosx",
                  # informational text file
                  "./STATE",
                  # informational text file
                  "./TODO",
                  # GPL itself
                  "./COPYING",
                  # LGPL itself
                  "./COPYING.LIB",
                  # generated, empty
                  "./stamp-h1",
                  # informational text file
                  "./INSTALL",
                  # installed by autotools automatically
                  "./scripts/install-sh",
                  # informational text file
                  "./doc/IO_PORTS.txt",
                  # informational text file
                  "./doc/CODINGSTYLE",
                  # TODO can dekstop files have comments?
                  "./doc/faum.desktop"
                  )

class CopyrightHolder:
      def __init__(self, firstname, lastname, email, year1, year2):
            self._firstname = firstname
            self._lastname = lastname
            self._email = email

            if year1 is not None:
                  self._year1 = int(year1)
            else:
                  self._year1 = None

            if year2 is not None:
                  self._year2 = int(year2)
            else:
                  self._year2 = None

      def isFAUmachineTeam(self):
            return     (self._firstname == "FAUmachine") \
                   and (self._lastname == "Team")

      def __str__(self):
            s = "Copyright (c)"
            if self._year1 is not None:
                  s += " %d" % self._year1
            if self._year2 is not None:
                  s +="-%d" % self._year2
            s += " by"
            if self._firstname is not None:
                  s += " %s" % self._firstname
            if self._lastname is not None:
                  s += " %s" % self._lastname
            if self._email is not None:
                  s += " %s" % self._email

            return s

      def __cmp__(self, other):
            # two holders are identical, if first and last name match
            # also sort by lastname then firstname
            if self._lastname != other._lastname:
                  return cmp(self._lastname, other._lastname)
            return cmp(self._firstname, other._firstname)

      def __hash__(self):
            s = self._lastname + self._firstname
            return hash(s)


00126 class LicensedFileBase:
      """ base class for all licensed files """

00129       def __init__(self, path):
            """ c'tor. Path: path to file to check """
            # path to file
            self._path = path
            # copyright holders list
            self._holders = []
            # license shortcut
            self._shortcut = ""
            # is it a generated file?
            self._generated = False

00140       def getPath(self):
            """ get pathname to file """
            return self._path

00144       def isStandard(self):
            """ is this file authored solely by FAUmachine AUTHORS and 
                distributable under GPL-2+?
            """
            if     (self._shortcut == "GPL-2+") \
               and (len(self._holders) == 1) \
               and (self._holders[0].isFAUmachineTeam()):
                  return True

            return False

00155       def getLicenseShortcut(self):
            """ returns the license shortcut (if any)
            """
            return self._shortcut

00160       def isGenerated(self):
            """ returns True if the file is generated through another file
            """
            return self._generated
      
00165       def _process(self, txt):
            """ set all members by evaluating the textual license txt
            """
            self._findHolders(txt)

            if LicensedFileBase._isGPL2P(txt):
                  self._shortcut = "GPL-2+"
            elif LicensedFileBase._isLGPL2P(txt):
                  self._shortcut = "LGPL-2+"
            else:
                  self._shortcut = "unknown"

            self._generated = LicensedFileBase._isGenerated(txt)

      def _findHolders(self, txt):
            crLine = """
                  [Cc]opyright[ ]
                  \([cC]\)
                  (?:[ ](?P<year1>[\d]+))?
                  (?:-(?P<year2>[\d]+))?
                  (?:[ ]by)?
                  [ ]
                  (?P<firstname>[\w]+)
                  [ ]
                  (?P<lastname>[\w]+)
                  (?:[ ](?P<email><[\w]+@[\w.]+>))?
                  """
            p = re.compile(crLine, re.VERBOSE)

            for m in p.finditer(txt):
                  h = CopyrightHolder(**m.groupdict())
                  self._holders.append(h)

00198       def __cmp__(self, other):
            """ comparison method """
            if self._shortcut != other._shortcut:
                  return cmp(self._shortcut, other._shortcut)

            l1 = self._holders[:]
            l2 = other._holders[:]
            l1.sort()
            l2.sort()
            return cmp(l1, l2)

      def __str__(self):
            #s = "%s: %s\n" % (self._path, self._shortcut)
            s = ""
            for h in self._holders:
                  s += "  %s\n" % h
            s += "  License: %s." % self._shortcut
            return s

      def __hash__(self):
            h1 = hash(self._shortcut)
            for l in self._holders:
                  h1 ^= hash(l)

            return h1

      @staticmethod
      def _isGPL2P(txt):
            t1 = r"GNU General Public License"
            t2 = r"either version 2 of"
            t3 = r"or (at your option) any later version"

            if (t1 in txt) and (t2 in txt) and (t3 in txt):
                  return True

            return False

      @staticmethod
      def _isLGPL2P(txt):
            t1 = r"GNU " + "Lesser General Public License"
            t2 = r"either version 2 of"
            t3 = r"or (at your option) any later version."

            if (t1 in txt) and (t2 in txt) and (t3 in txt):
                  return True

            return False

      @staticmethod
      def _isGenerated(txt):
            gr = r"([gG]enerated (?:by|from)|Generator:){1}"
            m = re.search(gr, txt)

            return m is not None

      @staticmethod
      def _sanitizeText(txt):
            tt = string.maketrans("a", "a")
            delchars = "#/*"
            t = txt.translate(tt, delchars)

            # replace all subsequent whitespace with one space.
            m = r"[\s]+"
            p = re.compile(m)
            t = p.sub(" ", t)

            return t

      


00269 class LicensedTextFile(LicensedFileBase):
      """ class for all textual files """
00271       def __init__(self, path):
            LicensedFileBase.__init__(self, path)
            self._scan()

      def _scan(self):
            f = file(self._path, "r")
            txt = []
            for i in range(1, SOURCE_LINES):
                  txt.append(f.readline())
            f.close()

            txt = " ".join(txt)
            txt = LicensedFileBase._sanitizeText(txt)
            self._process(txt)

00286 class LicensedPNGFile(LicensedFileBase):
      """ class for PNG files """
00288       def __init__(self, path):
            LicensedFileBase.__init__(self, path)
            self._scan()

00292       def _scan(self):
            """ take the text from the "Comment" section of texts
                stored in the PNG file
            """
            txt = ""
            f = file(self._path, "r")
            b = f.read(4)
            if len(b) < 4:
                  raise Exception("%s is not a PNG file", self._path)

            if (ord(b[0]) != 0x89) or (b[1:4] != "PNG"):
                  raise Exception("%s is not a PNG file", self._path)

            # skip remaining header bytes
            f.seek(4, os.SEEK_CUR);

            while(True):
                  # first 32 bit field: length of chunk
                  length = f.read(4)
                  if len(length) < 4:
                        raise Exception("Corrupt file %s", self._path)

                  # unpack uint32_t (big endian)
                  length = struct.unpack(">L", length)
                  assert len(length) == 1
                  length = length[0]
                  
                  # last field: length==0
                  if length == 0:
                        break

                  # type of chunk
                  t = f.read(4)
                  if len(t) < 4:
                        raise Exception("Corrupt file %s", self._path)

                  if t != "tEXt":
                        # unintersting, skip this chunk
                        # also skip crc32 at end of chunk
                        f.seek(length + 4, os.SEEK_CUR)
                        continue

                  # it is a tEXt chunk
                  txt += LicensedPNGFile._readtEXtChunk(f, length)

                  # skip crc 32
                  f.seek(4, os.SEEK_CUR)

            f.close()
            self._process(txt)

      @staticmethod
      def _readtEXtChunk(f, length):
            comments = {}
            i = 0
            mode = 0
            sec = ""
            txt = ""

            # text chunks are in the form (key\0value)+
            while (i < length):
                  i += 1
                  c = f.read(1)
                  assert (len(c) == 1)
                  if mode == 0:
                        if ord(c) == 0:
                              mode = 1
                              continue
                        sec += c
                  elif mode == 1:
                        if ord(c) == 0:
                              mode = 0
                              comments[sec] = txt
                              sec = ""
                              txt = ""
                              continue
                        txt += c

            # last comment not yet added
            comments[sec] = txt
            # check if there is a "Comment" key and return it.
            if comments.has_key("Comment"):
                  return comments["Comment"]

            # default: don't look at other sections
            return ""

00379 class FileRegistry:
      """ keep track of scanned files
      """
      def __init__(self):
            # plain list with LicensedFileBase of all checked files
            self._files = []
            # dictionary key: file value: list of files
            self._filedict = {}

00388       def checkFile(self, path):
            """ check/register file with given path
            """

            lf = None
            if FileRegistry.isTextFile(path):
                  lf = LicensedTextFile(path)
            elif FileRegistry.isPNGFile(path):
                  lf = LicensedPNGFile(path)
            else:
                  #print "Warning: Not checking %s" % path
                  return

            if not lf.isGenerated():
                  self._files.append(lf)
                  if self._filedict.has_key(lf):
                        self._filedict[lf].append(lf)
                  else:
                        self._filedict[lf] = [ lf ]

      def traverse(self):
            for root, dirs, files in os.walk("."):
                  for b in BLACKLISTED_DIRS:
                        if b in dirs:
                              dirs.remove(b)

                  for p in BLACKLISTED_PATHDIRS:
                        for b in dirs[:]:
                              if os.path.join(root, b) == p:
                                    dirs.remove(b)

                  for f in BLACKLISTED_FILES:
                        if f in files:
                              files.remove(f)

                  for p in BLACKLISTED_PATHFILES:
                        for f in files[:]:
                              if os.path.join(root, f) == p:
                                    files.remove(f)

                  for f in files:
                        p = os.path.join(root, f)
                        self.checkFile(p)

      def __str__(self):
            ret = ""
            # FIXME need to find minimum/maximum year bounds
            for key, valuelist in sorted(fr._filedict.iteritems()):
                  prefixes = ["* "]
                  prefixes += ["  " for x in valuelist[1:]]

                  suffixes = (["," for x in valuelist[:-1]])
                  suffixes.append(":")
                  valuelist.sort()

                  for p, i, s in zip(prefixes, valuelist, suffixes):
                        ret += "%s%s%s\n" % (p,i.getPath(),s)
                  ret += "\n"
                  ret += str(key)
                  ret += "\n\n"
            return ret

      def filterOutDefault(self, firstname, lastname, license):
            lf = LicensedFileBase("dummy")
            lf._holders.append(
                  CopyrightHolder(firstname, lastname, "", 0, 0))
            lf._shortcut = license

            if self._filedict.has_key(lf):
                  del(self._filedict[lf])

      @staticmethod
      def isTextFile(path):
            # quick filter for binary files.
            if path[-2:] == ".o":
                  return False
            if path[-2:] == ".a":
                  return False
            if path[-4:] == ".rom":
                  return False
            if path[-4:] == ".png":
                  return False

            # tiny magic: check for ELF magic bytes
            f = file(path, "r")
            b = f.read(4)
            f.close()
            if (len(b) >= 4):
                  if (ord(b[0]) == 0x7F) and (b[1:4] == "ELF"):
                        return False

            return True

      @staticmethod
00482       def isPNGFile(path):
            """ is path a PNG file?
            """
            if path[-4:] != ".png":
                  return False

            f = file(path, "r")
            b = f.read(4)
            f.close()
            if (len(b) >= 4):
                  if (ord(b[0]) == 0x89) and (b[1:4] == "PNG"):
                        return True

            return False




if __name__ == '__main__':
      fr = FileRegistry()
      fr.traverse()
      fr.filterOutDefault("FAUmachine", "Team", "GPL-2+")
      print fr

Generated by  Doxygen 1.6.0   Back to index