211 lines
8.7 KiB
Python
211 lines
8.7 KiB
Python
import os
|
|
from pathlib import Path
|
|
from pdf2image import convert_from_path
|
|
from PIL import Image
|
|
import json
|
|
def convert_pdfs_to_png(input_dir, output_dir=None, dpi=300):
|
|
"""
|
|
Convert all PDFs in a directory to PNG images.
|
|
|
|
Args:
|
|
input_dir: Directory containing PDF files
|
|
output_dir: Directory to save PNG files (defaults to input_dir/png_output)
|
|
dpi: Resolution for conversion (default 300 for high quality)
|
|
"""
|
|
input_path = Path(input_dir)
|
|
|
|
if not input_path.exists():
|
|
print(f"Error: Directory '{input_dir}' does not exist")
|
|
return
|
|
|
|
# Set output directory
|
|
if output_dir is None:
|
|
output_path = input_path / "png_output"
|
|
else:
|
|
output_path = Path(output_dir)
|
|
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Find all PDF files
|
|
pdf_files = list(input_path.glob("*.pdf"))
|
|
|
|
if not pdf_files:
|
|
print(f"No PDF files found in '{input_dir}'")
|
|
return
|
|
|
|
print(f"Found {len(pdf_files)} PDF file(s)")
|
|
print(f"Converting with {dpi} DPI for high quality...")
|
|
|
|
for pdf_file in pdf_files:
|
|
try:
|
|
print(f"\nProcessing: {pdf_file.name}")
|
|
|
|
# Convert PDF to images
|
|
images = convert_from_path(
|
|
pdf_file,
|
|
dpi=dpi,
|
|
fmt='png',
|
|
thread_count=4 # Use multiple threads for faster conversion
|
|
)
|
|
|
|
# Save each page
|
|
for i, image in enumerate(images, start=1):
|
|
if len(images) > 1:
|
|
output_filename = f"{pdf_file.stem}_page_{i}.png"
|
|
else:
|
|
output_filename = f"{pdf_file.stem}.png"
|
|
|
|
output_file = output_path / output_filename
|
|
|
|
# Save with optimized compression
|
|
image.save(
|
|
output_file,
|
|
'PNG',
|
|
optimize=True, # Enable optimization
|
|
compress_level=6 # Balanced compression (0-9, 6 is good balance)
|
|
)
|
|
|
|
print(f" Saved: {output_filename} ({image.size[0]}x{image.size[1]}px)")
|
|
|
|
print(f"✓ Completed: {pdf_file.name} ({len(images)} page(s))")
|
|
|
|
except Exception as e:
|
|
print(f"✗ Error processing {pdf_file.name}: {str(e)}")
|
|
|
|
print(f"\n{'='*50}")
|
|
print(f"Conversion complete!")
|
|
print(f"Output directory: {output_path.absolute()}")
|
|
|
|
def split_images_into_sectors(input_dir, output_dir=None, overlap_percent=1):
|
|
"""
|
|
Split PNG images in a directory into 25 sectors (5x5 grid) with overlap.
|
|
|
|
Args:
|
|
input_dir: Directory containing PNG files
|
|
output_dir: Directory to save split images (defaults to input_dir/sectors)
|
|
overlap_percent: Percentage of overlap (default 1%)
|
|
"""
|
|
input_path = Path(input_dir)
|
|
|
|
if not input_path.exists():
|
|
print(f"Error: Directory '{input_dir}' does not exist")
|
|
return
|
|
|
|
# Set output directory
|
|
if output_dir is None:
|
|
output_path = input_path / "sectors"
|
|
else:
|
|
output_path = Path(output_dir)
|
|
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Find all PNG files
|
|
png_files = list(input_path.glob("*.png"))
|
|
|
|
if not png_files:
|
|
print(f"No PNG files found in '{input_dir}'")
|
|
return
|
|
|
|
print(f"Found {len(png_files)} PNG file(s)")
|
|
print(f"Splitting into 25 sectors (5x5 grid) with {overlap_percent}% overlap...")
|
|
|
|
for png_file in png_files:
|
|
try:
|
|
print(f"\nProcessing: {png_file.name}")
|
|
|
|
# Open image
|
|
img = Image.open(png_file)
|
|
width, height = img.size
|
|
|
|
# Calculate overlap in pixels
|
|
h_overlap = int(width * overlap_percent / 100)
|
|
v_overlap = int(height * overlap_percent / 100)
|
|
|
|
# Calculate split points for 5x5 grid
|
|
h_split1 = width // 5
|
|
h_split2 = 2 * width // 5
|
|
h_split3 = 3 * width // 5
|
|
h_split4 = 4 * width // 5
|
|
v_split1 = height // 5
|
|
v_split2 = 2 * height // 5
|
|
v_split3 = 3 * height // 5
|
|
v_split4 = 4 * height // 5
|
|
|
|
# Define 25 sectors with overlap (5x5 grid)
|
|
# Format: (left, top, right, bottom)
|
|
sectors = {
|
|
# Row 1
|
|
'row1_col1': (0, 0, h_split1 + h_overlap, v_split1 + v_overlap),
|
|
'row1_col2': (h_split1 - h_overlap, 0, h_split2 + h_overlap, v_split1 + v_overlap),
|
|
'row1_col3': (h_split2 - h_overlap, 0, h_split3 + h_overlap, v_split1 + v_overlap),
|
|
'row1_col4': (h_split3 - h_overlap, 0, h_split4 + h_overlap, v_split1 + v_overlap),
|
|
'row1_col5': (h_split4 - h_overlap, 0, width, v_split1 + v_overlap),
|
|
|
|
# Row 2
|
|
'row2_col1': (0, v_split1 - v_overlap, h_split1 + h_overlap, v_split2 + v_overlap),
|
|
'row2_col2': (h_split1 - h_overlap, v_split1 - v_overlap, h_split2 + h_overlap, v_split2 + v_overlap),
|
|
'row2_col3': (h_split2 - h_overlap, v_split1 - v_overlap, h_split3 + h_overlap, v_split2 + v_overlap),
|
|
'row2_col4': (h_split3 - h_overlap, v_split1 - v_overlap, h_split4 + h_overlap, v_split2 + v_overlap),
|
|
'row2_col5': (h_split4 - h_overlap, v_split1 - v_overlap, width, v_split2 + v_overlap),
|
|
|
|
# Row 3
|
|
'row3_col1': (0, v_split2 - v_overlap, h_split1 + h_overlap, v_split3 + v_overlap),
|
|
'row3_col2': (h_split1 - h_overlap, v_split2 - v_overlap, h_split2 + h_overlap, v_split3 + v_overlap),
|
|
'row3_col3': (h_split2 - h_overlap, v_split2 - v_overlap, h_split3 + h_overlap, v_split3 + v_overlap),
|
|
'row3_col4': (h_split3 - h_overlap, v_split2 - v_overlap, h_split4 + h_overlap, v_split3 + v_overlap),
|
|
'row3_col5': (h_split4 - h_overlap, v_split2 - v_overlap, width, v_split3 + v_overlap),
|
|
|
|
# Row 4
|
|
'row4_col1': (0, v_split3 - v_overlap, h_split1 + h_overlap, v_split4 + v_overlap),
|
|
'row4_col2': (h_split1 - h_overlap, v_split3 - v_overlap, h_split2 + h_overlap, v_split4 + v_overlap),
|
|
'row4_col3': (h_split2 - h_overlap, v_split3 - v_overlap, h_split3 + h_overlap, v_split4 + v_overlap),
|
|
'row4_col4': (h_split3 - h_overlap, v_split3 - v_overlap, h_split4 + h_overlap, v_split4 + v_overlap),
|
|
'row4_col5': (h_split4 - h_overlap, v_split3 - v_overlap, width, v_split4 + v_overlap),
|
|
|
|
# Row 5
|
|
'row5_col1': (0, v_split4 - v_overlap, h_split1 + h_overlap, height),
|
|
'row5_col2': (h_split1 - h_overlap, v_split4 - v_overlap, h_split2 + h_overlap, height),
|
|
'row5_col3': (h_split2 - h_overlap, v_split4 - v_overlap, h_split3 + h_overlap, height),
|
|
'row5_col4': (h_split3 - h_overlap, v_split4 - v_overlap, h_split4 + h_overlap, height),
|
|
'row5_col5': (h_split4 - h_overlap, v_split4 - v_overlap, width, height)
|
|
}
|
|
|
|
# Crop and save each sector
|
|
for sector_name, bbox in sectors.items():
|
|
sector_img = img.crop(bbox)
|
|
|
|
output_filename = f"{png_file.stem}_{sector_name}.png"
|
|
output_file = output_path / output_filename
|
|
|
|
# Save with optimized compression
|
|
sector_img.save(
|
|
output_file,
|
|
'PNG',
|
|
optimize=True,
|
|
compress_level=6
|
|
)
|
|
|
|
sector_width = bbox[2] - bbox[0]
|
|
sector_height = bbox[3] - bbox[1]
|
|
print(f" Saved: {output_filename} ({sector_width}x{sector_height}px)")
|
|
|
|
print(f"✓ Completed: {png_file.name} (25 sectors)")
|
|
|
|
except Exception as e:
|
|
print(f"✗ Error processing {png_file.name}: {str(e)}")
|
|
|
|
print(f"\n{'='*50}")
|
|
print(f"Splitting complete!")
|
|
print(f"Output directory: {output_path.absolute()}")
|
|
if __name__ == "__main__":
|
|
# Example usage
|
|
|
|
# Step 1: Convert PDFs to PNGs
|
|
input_directory = "./02_Fluxogramas" # Change this to your PDF directory
|
|
output_directory = "./pngs" # Optional: specify output directory
|
|
|
|
# Convert with high DPI (300 is standard for print quality)
|
|
#convert_pdfs_to_png(input_directory, output_directory, dpi=300)
|
|
|
|
# Step 2: Split PNGs into 9 sectors (3x3 grid)
|
|
split_images_into_sectors("./pngs", "./sectors", overlap_percent=1) |