#! /usr/bin/env ruby

Main {
  Home = File.expand_path(ENV["HOME"] || ENV["USERPROFILE"] || "~")
  Basedir = File.join(Home, "mp3")

  description <<-txt
    mp3scrape will scour any url for it's mp3 content - the script mirrors,
    never downloading the same file twice
  txt

  example <<-txt
    1) get a bunch of xmas tunes
      mp3scrape http://fuelfriends.blogspot.com/2007/12/christmas-mixery.html
  txt

  argument("uri"){
    description "the uri to scrape"
    cast :uri
  }

  option("pattern", "p"){
    description "specifiy the mp3 pattern"
    argument_required
    default %|['"](http://[^\\s]+[^/\\s]+.mp3)["']|
  }

  option("basedir", "b"){
    description "specifiy the base download dir - default(#{ Basedir })"
    argument_required
    default Basedir
  }

  option("destination", "d"){
    description "specifiy the absolute download dir - default(#{ File.join Basedir, 'auto-based-on-uri' })"
    argument_required
  }

  option("list"){
    description "only list the mp3s that would be scraped"
  }

  option("noop", "n"){
    description "show the downloads that would be performed"
  }

  def run
    uri = param["uri"].value
    pattern = %r/#{ param["pattern"].value }/
    srcs = open(uri.to_s).read.scan(pattern).flatten.compact
    if param["list"].given?
      puts srcs
      exit
    end
    dsts = destinations_for srcs, param["destination"].value
    spec = srcs.zip dsts
    if param["noop"].given?
      spec.each{|src, dst| puts "#{ src } -> #{ dst }"}
    else
      mirror spec
    end
  end

  def mirror spec
    spec.each do |src, dst|
      FileUtils.mkdir_p(File.dirname(dst))
      mtime = File.stat(dst).mtime rescue Time.at(0)
      open src do |fd|
        print src
        begin
          last_modified = fd.last_modified
          unless last_modified > mtime
            print " == "
            break
          end
          print " -> "
          data = fd.read and fd.close
          open(dst, "wb"){|fd| fd.write data}
          File.utime last_modified, last_modified, dst
        ensure
          puts dst
        end
      end
    end
  end

  def destinations_for srcs, destination = nil
    srcs.map do |src|
      basename = File.basename src
      basename = clean basename
      File.expand_path(
        if destination
          File.join destination, basename
        else
          uri = URI.parse src.to_s
          host, paths = uri.host, uri.path.split("/").map{|path| clean path}
          basename = clean paths.pop
          [ Basedir, host, paths, basename ].flatten.compact.join(File::SEPARATOR)
        end
      )
    end
  end

  def clean basename
    CGI.unescape(basename.to_s).gsub(%r/[^0-9a-zA-Z_@)(~.-]/, '_').gsub(%r/_+/,'_')
  end
}

BEGIN {
  require "yaml"
  require "uri"
  require "open-uri"
  require "fileutils"
  require "cgi"
  begin
    require "rubygems"
  rescue LoadError
    42
  end
  begin
    require "main"
  rescue LoadError
    STDERR.puts "gem install main"
    exit 1
  end
  STDERR.sync = STDOUT.sync = true
  trap("INT"){ exit }
}