c4f026ee202bfec10c483903d693a3c33d4e90b4

Author: Robin Luckey

Date: 2009-05-13 09:37:37 -0700

[NEW] GitParser can parse a generic git log (that is, doesn't require our custom formatter).

diff --git a/bin/ohlog b/bin/ohlog index d46b7a5..311530b 100755 --- a/bin/ohlog +++ b/bin/ohlog @@ -26,6 +26,7 @@ Ohloh source control log parser [option] can be one of the following: --cvs Parse a CVS rlog + --git Parse a Git log (generated with or without --name-status option) --svn Parse a Subversion log --svn-xml Parse a Subversion XML log --hg Parse a Mercurial log @@ -56,6 +57,10 @@ HELP parse CvsParser end + def git + parse GitParser + end + def svn parse SvnParser end @@ -101,6 +106,8 @@ HELP case option when '--cvs' self.subcommand = :cvs + when '--git' + self.subcommand = :git when '--svn' self.subcommand = :svn when '--svn-xml' diff --git a/lib/scm.rb b/lib/scm.rb index 8702a65..c89c648 100644 --- a/lib/scm.rb +++ b/lib/scm.rb @@ -24,6 +24,7 @@ require 'lib/scm/parsers/branch_number' require 'lib/scm/parsers/cvs_parser' require 'lib/scm/parsers/svn_parser' require 'lib/scm/parsers/svn_xml_parser' +require 'lib/scm/parsers/git_parser' require 'lib/scm/parsers/git_styled_parser' require 'lib/scm/parsers/hg_parser' require 'lib/scm/parsers/hg_styled_parser' diff --git a/lib/scm/parsers/git_parser.rb b/lib/scm/parsers/git_parser.rb new file mode 100644 index 0000000..14269e9 --- /dev/null +++ b/lib/scm/parsers/git_parser.rb @@ -0,0 +1,67 @@ +module Scm::Parsers + class GitParser < Parser + def self.scm + 'git' + end + + ANONYMOUS = "(no author)" unless defined?(ANONYMOUS) + + def self.internal_parse(io, opts) + e = nil + state = :key_values + + io.each do |line| + line.chomp! + + # Kind of a hack: the diffs section is not always present. + # Also, we don't know when the next commit is going to begin, + # so we may need to make an unexpected state change. + if line =~ /^commit ([a-z0-9]{40,40})$/ + state = :key_values + elsif state == :message and line =~ /^[ADM]\s+(.+)$/ + state = :diffs + end + + if state == :key_values + case line + when /^commit ([a-z0-9]{40,40})$/ + sha1 = $1 + yield e if e + e = Scm::Commit.new + e.diffs = [] + e.token = sha1 + e.author_name = ANONYMOUS + when /^Author: (.+) <(.*)>$/ + # In the rare case that the Git repository does not contain any names (see OpenEmbedded for example) + # we use the email instead. + e.author_name = $1 || $2 + e.author_email = $2 + when /^Date: (.*)$/ + e.author_date = Time.parse($1).utc # Note strongly: MUST be RFC2822 format to parse properly + state = :message + end + + elsif state == :message + case line + when / (.*)/ + if e.message + e.message << "\n" << $1 + else + e.message = $1 + end + end + + elsif state == :diffs + if line =~ /^([ADM])\t(.+)$/ + e.diffs << Scm::Diff.new( :action => $1, :path => $2) + end + + else + raise RuntimeError("Unknown parser state #{state.to_s}") + end + end + + yield e if e + end + end +end diff --git a/test/unit/git_parser_test.rb b/test/unit/git_parser_test.rb new file mode 100644 index 0000000..68370a5 --- /dev/null +++ b/test/unit/git_parser_test.rb @@ -0,0 +1,59 @@ +require File.dirname(__FILE__) + '/../test_helper' + +module Scm::Parsers + class GitParserTest < Scm::Test + + def test_empty_array + assert_equal([], GitParser.parse('')) + end + + def test_log_parser_default +sample_log = <<SAMPLE +commit 1df547800dcd168e589bb9b26b4039bff3a7f7e4 +Author: Jason Allen <jason@ohloh.net> +Date: Fri Jul 14 16:07:15 2006 -0700 + + moving COPYING + +A COPYING + +commit 2e9366dd7a786fdb35f211fff1c8ea05c51968b1 +Author: Robin Luckey <robin@ohloh.net> +Date: Sun Jun 11 11:34:17 2006 -0700 + + added some documentation and licensing info + +M README +D helloworld.c +SAMPLE + + commits = GitParser.parse(sample_log) + + assert commits + assert_equal 2, commits.size + + assert_equal '1df547800dcd168e589bb9b26b4039bff3a7f7e4', commits[0].token + assert_equal 'Jason Allen', commits[0].author_name + assert_equal 'jason@ohloh.net', commits[0].author_email + assert_equal "moving COPYING", commits[0].message + assert_equal Time.utc(2006,7,14,23,7,15), commits[0].author_date + assert_equal 1, commits[0].diffs.size + + assert_equal "A", commits[0].diffs[0].action + assert_equal "COPYING", commits[0].diffs[0].path + + assert_equal '2e9366dd7a786fdb35f211fff1c8ea05c51968b1', commits[1].token + assert_equal 'Robin Luckey', commits[1].author_name + assert_equal 'robin@ohloh.net', commits[1].author_email + assert_equal "added some documentation and licensing info", commits[1].message # Note \n at end of comment + assert_equal Time.utc(2006,6,11,18,34,17), commits[1].author_date + assert_equal 2, commits[1].diffs.size + + assert_equal "M", commits[1].diffs[0].action + assert_equal "README", commits[1].diffs[0].path + assert_equal "D", commits[1].diffs[1].action + assert_equal "helloworld.c", commits[1].diffs[1].path + end + + end +end