1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
class PdfProcessorService
DEFAULT_OPTIONS = {
quality: 40,
resize_percentage: 50,
format: 'jpg'
}.freeze
def initialize(resource, options = {})
@resource = resource
@options = DEFAULT_OPTIONS.merge(options)
end
def process_pdfs
@resource.pdfs.each do |pdf|
pdf_path = get_blob_path(pdf)
process_single_pdf(pdf_path)
end
end
private
def get_blob_path(pdf)
ActiveStorage::Blob.service.send(:path_for, pdf.key)
end
def process_single_pdf(pdf_path)
page_count = get_page_count(pdf_path)
(0...page_count).each do |index|
Rails.logger.info "Processing page #{index + 1} of #{page_count} from #{pdf_path}"
process_page(pdf_path, index)
rescue StandardError => e
Rails.logger.error "Failed to process page #{index + 1}: #{e.message}"
raise
end
end
def get_page_count(pdf_path)
Rails.logger.debug "Checking PDF: #{pdf_path}"
Rails.logger.debug "File exists: #{File.exist?(pdf_path)}"
Rails.logger.debug "File size: #{File.size(pdf_path)}"
Rails.logger.debug "File type: #{`file -b #{pdf_path}`}"
# Try multiple methods to get page count
identify_output = `identify -format %n "#{pdf_path}" 2>&1`
gs_output = `gs -q -dNODISPLAY -c "#{pdf_path} (r) file runpdfbegin pdfpagecount = quit" 2>&1`
Rails.logger.debug "Identify output: #{identify_output}"
Rails.logger.debug "Ghostscript output: #{gs_output}"
count = identify_output.to_i
if count <= 0
count = gs_output.to_i
end
Rails.logger.debug "Final page count: #{count}"
raise "Invalid page count: #{count}" unless count.positive?
count
rescue StandardError => e
Rails.logger.error "Failed to get page count: #{e.message}"
raise
end
def process_page(pdf_path, page_index)
temp_files = create_page_images(pdf_path, page_index)
attach_processed_image(temp_files[:reduced], page_index)
ensure
cleanup_temp_files(temp_files)
end
def create_page_images(pdf_path, index)
original_path = "page-#{index + 1}.#{@options[:format]}"
reduced_path = "reduced-page-#{index + 1}.#{@options[:format]}"
create_original_image(pdf_path, index, original_path)
create_reduced_image(original_path, reduced_path)
{ original: original_path, reduced: reduced_path }
end
def create_original_image(pdf_path, index, output_path)
MiniMagick::Tool::Convert.new do |convert|
convert << "#{pdf_path}[#{index}]"
convert << output_path
end
rescue StandardError => e
Rails.logger.error "Failed to create original image for page #{index + 1} from #{pdf_path}: #{e.message}"
raise
end
def create_reduced_image(input_path, output_path)
MiniMagick::Tool::Convert.new do |convert|
convert << input_path
convert << "-quality"
convert << @options[:quality].to_s
convert.strip
convert << "-resize"
convert << "#{@options[:resize_percentage]}%"
convert << output_path
end
rescue StandardError => e
Rails.logger.error "Failed to create reduced image from #{input_path}: #{e.message}"
raise
end
def attach_processed_image(image_path, index)
return unless File.exist?(image_path)
filename = generate_filename(index)
image_blob = create_blob(image_path, filename)
@resource.pdf_snapshots.attach(image_blob)
end
def generate_filename(index)
base_name = @resource.name.strip.downcase.gsub(/\s/, "_")
"#{base_name}_page-#{index + 1}.#{@options[:format]}"
end
def create_blob(path, filename)
ActiveStorage::Blob.create_and_upload!(
io: File.open(path),
filename: filename,
content_type: "image/#{@options[:format]}"
)
end
def cleanup_temp_files(temp_files)
temp_files.values.each do |path|
File.delete(path) if File.exist?(path)
end
end
end
|