aboutsummaryrefslogtreecommitdiffstats
path: root/app/services/pdf_processor_service.rb
blob: 3f97611bfd9d84b755b40a231cd61b717feb5b8d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
class PdfProcessorService
  DEFAULT_OPTIONS = {
    quality: 40,
    resize_percentage: 50,
    format: 'jpg'
  }.freeze

  def initialize(resource, options = {})
    @resource = resource
    @options = DEFAULT_OPTIONS.merge(options)
  end

  def process_pdfs
    @resource.pdfs.each do |pdf|
      pdf_path = get_blob_path(pdf)
      process_single_pdf(pdf_path)
    end
  end

  private

  def get_blob_path(pdf)
    ActiveStorage::Blob.service.send(:path_for, pdf.key)
  end

  def process_single_pdf(pdf_path)
    page_count = get_page_count(pdf_path)
    
    (0...page_count).each do |index|
      Rails.logger.info "Processing page #{index + 1} of #{page_count} from #{pdf_path}"
      process_page(pdf_path, index)
    rescue StandardError => e
      Rails.logger.error "Failed to process page #{index + 1}: #{e.message}"
      raise
    end
  end

  def get_page_count(pdf_path)
    Rails.logger.debug "Checking PDF: #{pdf_path}"
    Rails.logger.debug "File exists: #{File.exist?(pdf_path)}"
    Rails.logger.debug "File size: #{File.size(pdf_path)}"
    Rails.logger.debug "File type: #{`file -b #{pdf_path}`}"
    
    # Try multiple methods to get page count
    identify_output = `identify -format %n "#{pdf_path}" 2>&1`
    gs_output = `gs -q -dNODISPLAY -c "#{pdf_path} (r) file runpdfbegin pdfpagecount = quit" 2>&1`
    
    Rails.logger.debug "Identify output: #{identify_output}"
    Rails.logger.debug "Ghostscript output: #{gs_output}"
    
    count = identify_output.to_i
    if count <= 0
      count = gs_output.to_i
    end
    
    Rails.logger.debug "Final page count: #{count}"
    raise "Invalid page count: #{count}" unless count.positive?
    count
  rescue StandardError => e
    Rails.logger.error "Failed to get page count: #{e.message}"
    raise
  end

  def process_page(pdf_path, page_index)
    temp_files = create_page_images(pdf_path, page_index)
    attach_processed_image(temp_files[:reduced], page_index)
  ensure
    cleanup_temp_files(temp_files)
  end

  def create_page_images(pdf_path, index)
    original_path = "page-#{index + 1}.#{@options[:format]}"
    reduced_path = "reduced-page-#{index + 1}.#{@options[:format]}"

    create_original_image(pdf_path, index, original_path)
    create_reduced_image(original_path, reduced_path)

    { original: original_path, reduced: reduced_path }
  end

  def create_original_image(pdf_path, index, output_path)
    MiniMagick::Tool::Convert.new do |convert|
      convert << "#{pdf_path}[#{index}]"
      convert << output_path
    end
  rescue StandardError => e
    Rails.logger.error "Failed to create original image for page #{index + 1} from #{pdf_path}: #{e.message}"
    raise
  end

  def create_reduced_image(input_path, output_path)
    MiniMagick::Tool::Convert.new do |convert|
      convert << input_path
      convert << "-quality"
      convert << @options[:quality].to_s
      convert.strip
      convert << "-resize"
      convert << "#{@options[:resize_percentage]}%"
      convert << output_path
    end
  rescue StandardError => e
    Rails.logger.error "Failed to create reduced image from #{input_path}: #{e.message}"
    raise
  end

  def attach_processed_image(image_path, index)
    return unless File.exist?(image_path)

    filename = generate_filename(index)
    image_blob = create_blob(image_path, filename)
    @resource.pdf_snapshots.attach(image_blob)
  end

  def generate_filename(index)
    base_name = @resource.name.strip.downcase.gsub(/\s/, "_")
    "#{base_name}_page-#{index + 1}.#{@options[:format]}"
  end

  def create_blob(path, filename)
    ActiveStorage::Blob.create_and_upload!(
      io: File.open(path),
      filename: filename,
      content_type: "image/#{@options[:format]}"
    )
  end

  def cleanup_temp_files(temp_files)
    temp_files.values.each do |path|
      File.delete(path) if File.exist?(path)
    end
  end
end