#!/usr/bin/ruby # # flhistogram.rb - Focal Length Histogram v0.7 - 2007-10-15 # # Short: flhistogram.rb calculates a histogram of the focal lengths used # in given photo files. # # # Copyright (C) 2007 by Pedro Venda < pjvenda (at) pjvenda org > # # Distributed under the terms of the GNU Public License Agreement (GPLv2) # http://www.fsf.org/licensing/licenses/gpl.html or # http://www.fsf.org/licensing/licenses/gpl.txt # # # Long: flhistogram.rb is a script that recursively finds image files # (supposedly photographies), obtains the Focal Length used to shoot # each one of them and finally builds a histogram of the different # Focal Lengths found. In case you are wondering, the histogram is a bar # graph that has focal lengths on the X axis and number of pictures found # on the Y axis. # Suppose the following example: 2 pictures shot at 18mm # and 4 shot at 35mm. The resulting histogram should be: # # | # 4| XX # | XX # 2| XX XX # | XX XX # +------------> # 18 35 (mm) # # I wrote this to decide which lens to buy next. Having a library of # about 7000 pictures, the focal length histogram would show me the focal # ranges that I use more often. My next lens should be one that performs # well on my preferred focal lengths. # # # Features # ======== # # - Uses two alternative methods for retrieving metadata information # from picture files: mini_exiftool and exifr. Both are ruby gems # but there are two big differences between the methods: exifr is # much faster but does not handle all files. # - Focal length scaling to 35mm equivalent is enabled by default when # using mini_exiftol. Crop factor can be sent as an argument if # different than 1.0 (35mm sensor). # # # Usage # ===== # # This script was designed to run from the command line. # Use -h or --help for help. # # # Changelog # ========= # # v0.7 - 2007-10-15 # # - handles lack of rubygems module gracefully # - gpscript output is configurable via internal options hash # - added focal length multiplier fitting to 35mm equivalent sensors # and back to a FLM/crop factor - WORKS ONLY WITH mini_exiftool # - progress output is shown between main tasks if # verbose output is enabled # - handles command line parsing errors gracefully # - handles MiniExiftool errors gracefully # - produces gnuplot script with embedded data # # v0.6 - 2007-10-01 # # - accept multiple path arguments from command line # * all path find results are concatenated # * duplicates are eliminated # - dependencies are now checked for existence before being used # * non-generic modules are loaded in separate functions # # v0.5 - 2007-09-28 # # - initial version. # # # TODO # ==== # # TODO: produce image output directly through interaction with some # gnuplot-ruby library # TODO: include dependencies on help file # TODO: adapt help file according to available dependencies # TODO: differentiate histograms between camera models (where available) # # # Some References / Credits # ========================= # # idea about the histogram # # http://tadek.pietraszek.org/blog/2007/03/27/pondering-about-a-prime-lens-for-my-camera-a-scientific-approach/ # # exiftool # # http://www.sno.phy.queensu.ca/~phil/exiftool/ # # ruby exif interfaces # # http://exifr.rubyforge.org/ # http://miniexiftool.rubyforge.org/ # # ruby - gnuplot stuff: # # http://rgplot.rubyforge.org/ # http://rgnuplot.sourceforge.net/ # # more gnuplot stuff: # # http://thetopher.com/2007/01/31/quick-and-easy-histograms-in-gnuplot/ # http://www.binf.ku.dk/~kasper/scriptlist/histogram.html # http://gnuplot.sourceforge.net/demo_4.3/histograms.html # http://t16web.lanl.gov/Kawano/gnuplot/datafile2-e.html # http://nucl.sci.hokudai.ac.jp/~ohnishi/Lib/gnuplot.html # http://www.cs.waikato.ac.nz/~fracpete/programming/csv2gnuplot/ # http://www.duke.edu/~hpgavin/gnuplot.html # require 'find' require 'ftools' require 'optparse' # find files starting from search_path def finder(search_path='.',exp=TRUE,rec=TRUE) # rec: OK # exp: OK # expand path of received search folder? if exp == TRUE search_path=File.expand_path(search_path) end # initialise array of resulting file list list=[] # see if search_path is a directory if FileTest.directory?(search_path) Find.find(search_path) do |path| if FileTest.directory?(path) # path is a directory if File.basename(path) == '.' || File.basename(path) == '..' || rec != TRUE # path is '.' OR '..' OR recursive find is disabled Find.prune # remove current path from find queue else next # do nothing. Find.* will recurse through it later end else list << path # add path to result list end end else # given search path was not a directory. result is itself list << search_path end # return search path and result list of files found [search_path,list] end # retrieve EXIF data using 'exifr' method def fetch_exif_exifr(file) begin exif_info=EXIFR::JPEG.new(file) rescue exif_info=nil end exif_info end # retrieve EXIF data using 'mini_exiftool' method def fetch_exif_miniexiftool(file) begin exif_info=MiniExiftool.new(file,:composite=>true) rescue MiniExiftool::Error => e exif_info=nil end exif_info end # retrieve EXIF data from image file def fetch_exif(file,method) exif_info = nil case method when 'exifr' [file,fetch_exif_exifr(file)] when 'mini_exiftool' [file,fetch_exif_miniexiftool(file)] else [file,nil] end end # print EXIF data using exifr method def print_exif_exifr(img_file,exif_info) if exif_info != nil && exif_info.exif? then puts "Standard items".center(72) puts "=" * 72 puts "File".rjust(30) + " : #{img_file}" puts "Height".rjust(30) + " : #{exif_info.height}" puts "Width".rjust(30) + " : #{exif_info.width}" puts "" puts "EXIF information".center(72) puts "=" * 72 h = exif_info.exif.to_hash h.each_pair do |k,v| puts k.to_s.rjust(30) + " : #{v}" end puts "-" * 72 puts "method: exifr".rjust(72) else puts "No EXIF information in this image" end end # print EXIF data using mini_exiftool method def print_exif_miniexiftool(img_file,exif_info) if exif_info != nil puts "Standard items".center(72) puts "=" * 72 puts "File".rjust(30) + " : #{img_file}" puts "Height".rjust(30) + " : #{exif_info['ImageHeight']}" puts "Width".rjust(30) + " : #{exif_info['ImageWidth']}" puts "" puts "EXIF information".center(72) puts "=" * 72 exif_info.tags.each do |tag| puts tag.to_s.rjust(30) + " : #{exif_info[tag]}" end puts "-" * 72 puts "method: mini_exiftool".rjust(72) else puts "No EXIF information in this image" end end # print EXIF data def print_exif(img_file,exif_info,method) case method when 'exifr' print_exif_exifr(img_file,exif_info) when 'mini_exiftool' print_exif_miniexiftool(img_file,exif_info) else puts "Unknown method to process image EXIF metadata" end end # extract focal length using exifr method def extract_fl_exifr(exif) if exif.exif? && exif.exif.to_hash.has_key?(:focal_length) exif.exif.to_hash[:focal_length].to_i else nil end end # extract focal length using mini_exiftool method # rescale result to given focal length multiplier def extract_fl_miniexiftool(exif,flm) if exif && exif['Focal-Length'] ret=exif['Focal-Length'].chomp("mm").to_f if exif['ScaleFactor35efl'] # scale up to 35mm and back down to given focal length multiplier # (crop factor) ret = ((ret * exif['ScaleFactor35efl']) / flm).to_i else # assume 35mm (or crop factor = 1.0) ret = (ret / flm).to_i end ret.to_i else nil end end # extract focal length from exif data def extract_fl(exif,method,flm) case method when 'exifr' extract_fl_exifr(exif) when 'mini_exiftool' extract_fl_miniexiftool(exif,flm) else nil end end # go through each file to be inspected, obtain the focal length # of each one and store the results in a hash with the form: # :image_name => focal_length def gather_fls(list,method,flm) result_hash={} count=0 # iterate through file list list.each do |file| # obtain EXIF metadata from file exif=fetch_exif(file,method) if exif[1] # exif extraction worked, apparently #print_exif(file,exif[1],method) # DEBUG stuff focal_length=extract_fl(exif[1],method,flm) # obtain focal length if focal_length result_hash[exif[0].to_s.intern]=focal_length.to_i count+=1 end end end # return hash with ':image_name => focal_length' pairs result_hash end # calculates histogram of focal lengths # takes a hash with: # ':image_name => focal_length' pairs # and generates a new hash with: # ':some_focal_length => number_of_images_with_some_focal_length def calculate_histogram(fls) fl_hash={} fls.each_pair do |k,v| if fl_hash.has_key?(v) fl_hash[v]+=1 else fl_hash[v]=1 end end fl_hash end # generates output def output_histogram(fl_hash,format,options) case format when 'csv' puts output_histogram_csv(fl_hash) when 'gpdata' puts output_histogram_gpdata(fl_hash) when 'gpscript' puts output_histogram_gpscript(fl_hash,options) when 'png' # TODO: generate PNG image directly. use output flag else puts "Unknown format for output" end end # generate output values in CSV format def output_histogram_csv(fl_hash) ret="" fl_array=fl_hash.sort fl_array.each do |el| ret+="#{el[0]},#{el[1]}\n" end ret end # generate output histogram values in gnuplot data format # (like CSV but values are separated by spaces instead of ',') def output_histogram_gpdata(fl_hash) ret="" fl_array=fl_hash.sort fl_array.each do |el| ret+="#{el[0]} #{el[1]}\n" end ret end # generate gnuplot script def output_histogram_gpscript(fl_hash,options) ret="#\n" ret+="# bar box width. for values > 1, bars overlap each other.\n" ret+="# for values < 1, gaps appear between bars\n" ret+="set boxwidth #{options[:boxwidth]}\n" ret+="# scales\n" ret+="set noautoscale\n" sorted_values=fl_hash.values.sort sorted_keys=fl_hash.keys.sort ret+="set yrange [0:#{(sorted_values.last*1.15).ceil}]\n" ret+="set xrange [#{(sorted_keys.first*0.85).floor}:#{(sorted_keys.last*1.15).ceil}]\n" ret+="# X and Y axis labels\n" ret+="set xlabel \"#{options[:xlabel]}\"\n" ret+="set ylabel \"#{options[:ylabel]}\"\n" ret+="set style fill solid 0.33 border\n" ret+="# output type\n" ret+="set terminal #{options[:format]} size #{options[:width]},#{options[:height]} " if options[:format] == 'png' || options[:image_format] == 'jpg' ret+="enhanced\n" elsif options[:format] == 'svg' ret+="dynamic enhanced\n" else # TODO: generate some type if no valid type was given end ret+="# output file\n" ret+="set output '#{options[:filename]}.#{options[:format]}'\n" ret+="set key off\n" ret+="# plot graph with included data\n" ret+="plot [#{(sorted_keys.first*0.85).floor}:#{(sorted_keys.last*1.15).ceil}] '-' with boxes\n" ret+=output_histogram_gpdata(fl_hash) ret+="end\n" ret end # wrapper that does 'require' calls with error handling def load_module(module_name) begin require module_name return TRUE rescue LoadError => e return FALSE end end # defines and parses command line options def parse_cmdline_options(argv) options={} opts=OptionParser.new do |opts| opts.banner = "Usage: flhistogram.rb [options]" opts.on_head('-m METHOD',[:exifr,:mini_exiftool],"Use 'exifr' or 'mini_exiftool' method of accessing EXIF data. 'exifr' is amazingly fast but does not support focal length conversions (between different sensor areas). Although it is not default, I would advise using 'mini_exiftool'.") do |m| options[:method]=m.to_s end opts.on('-f','--format FORMAT',[:csv,:gpdata,:gpscript,:png],"Output format. 'csv' for CSV data, 'gpdata' for gnuplot data format, 'gnuplot' for a gnuplot script (with data points) that builds the histogram or 'png' for a PNG image with the histogram graph.") do |f| options[:format]=f.to_s end opts.on('-c','--cfactor RATIO',"Focal length multiplier against 35mm equivalent.") do |c| options[:flm]=c.to_f end opts.on('-o','--output FILE',"Output file. Use '-' or omit this option to output to stdout.") do |o| options[:output]=o.to_s end opts.on('-v','--verbose',"Verbose mode") do |v| options[:verbose]=v end opts.on('-r','--recursive',"Recursively look for files") do |r| options[:recursive]=r end opts.on_tail('-h','--help',"Show this help text") do |h| puts opts.help exit end end begin opts.parse!(ARGV) rescue OptionParser::ParseError => e puts e.message exit end [options,argv] end # verbose puts # wrapper function for puts conditional to options[:verbose] def vputs(str,options,nlf=FALSE) if options[:verbose] if nlf options[:output].puts str.to_s else options[:output].puts str.to_s end end end # see if we were called directly if $0 == __FILE__ if not load_module('rubygems') puts "unable to load module 'rubygems'." exit end parsed_options=parse_cmdline_options(ARGV) options=parsed_options[0] new_argv=parsed_options[1] # configure histogram graph output options[:graph]={ :boxwidth => 0.90, :linewidth => 1.0, :xlabel => "Focal Length (mm)", :ylabel => "Count", :filename => 'histogram', :format => 'png', # png or jpg or svg :width => 800, :height => 600, } # verify necessary configuration variables and sensible values if not options.has_key?(:method) options[:method]='exifr' end if not options.has_key?(:recursive) options[:recursive]=FALSE end if not options.has_key?(:format) options[:format]='csv' end # TODO: use this options[:output] flag if not options.has_key?(:output) options[:output]=STDOUT end if options[:method] == 'mini_exiftool' if not load_module('mini_exiftool') puts "unable to load module 'mini_exiftool'." exit end else options[:method]='exifr' if not load_module('exifr') puts "unable to load module 'exifr'." exit end if options.has_key?(:flm) options[:output].puts "# WARNING: method 'exifr' is not able to retrieve the focal length multiplier factor," options[:output].puts "# so crop factor fitting will not be made." end end if not options.has_key?(:flm) # transform focal lengths to 35mm equivalent by default options[:flm]=1.0 end # if mini_exiftool was chosen as a method, add the crop factor value # to the X-axis graph label if options[:method] == 'mini_exiftool' options[:graph][:xlabel]+=" [crop factor=#{options[:flm]}]" end # verify ARGV length. if new_argv.length == 0 # no paramenters? work on . source_paths=['.'] else # accept multiple paths from command line source_paths=new_argv end # output some verbose configuration data if requested vputs("# - configuration/run info:",options) vputs("# search path(s): '#{source_paths.join("' , '")}'",options) vputs("# recursive find mode: #{options[:recursive]}",options) vputs("# method: #{options[:method]}",options) if options[:method] == 'mini_exiftool' vputs("# sensor focal length multiplier (crop factor) against 35mm equivalent: #{options[:flm]}",options) if options[:flm] == 1.6 vputs("# (correct for most Canon digital SLRs)",options) elsif options[:flm] == 1.5 vputs("# (correct for most Nikon digital SLRs)",options) elsif options[:flm] == 1.0 vputs("# (sensor is equivalent to 35mm)",options) end end vputs("# output format: #{options[:format]}",options) vputs("# - obtaining list of files to inspect... ",options,TRUE) options[:output].flush list=[] # get list of files to inspect source_paths.each do |spath| pre_list=finder(spath.to_s,TRUE,options[:recursive]) list+=pre_list[1] end # eliminate duplicates list.uniq! # set file counter (total files to consider) total_files_found=list.length vputs("done",options) vputs("# files found: #{total_files_found}",options) vputs("# - fetching focal lengths from every file... ",options,TRUE) options[:output].flush # build a hash of file names and focal_lengths fls=gather_fls(list,options[:method],options[:flm]) # set image counter (accounted images with focal length) images_found=fls.length vputs("done",options) vputs("# accounted images with valid EXIF data and including focal-length: #{images_found}",options) vputs("# - calculating histogram values... ",options,TRUE) options[:output].flush # compute histogram fl_histogram=calculate_histogram(fls) vputs("done",options) vputs("# - generating output:",options) vputs("#",options) vputs("#",options) options[:output].flush # output results output_histogram(fl_histogram,options[:format],options[:graph]) vputs("# - all done",options) options[:output].flush end