Files
AI-coodex-rekog-image-labeling/label/backend/topng.py
2026-05-14 14:07:04 -03:00

211 lines
8.7 KiB
Python

import os
from pathlib import Path
from pdf2image import convert_from_path
from PIL import Image
import json
def convert_pdfs_to_png(input_dir, output_dir=None, dpi=300):
"""
Convert all PDFs in a directory to PNG images.
Args:
input_dir: Directory containing PDF files
output_dir: Directory to save PNG files (defaults to input_dir/png_output)
dpi: Resolution for conversion (default 300 for high quality)
"""
input_path = Path(input_dir)
if not input_path.exists():
print(f"Error: Directory '{input_dir}' does not exist")
return
# Set output directory
if output_dir is None:
output_path = input_path / "png_output"
else:
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
# Find all PDF files
pdf_files = list(input_path.glob("*.pdf"))
if not pdf_files:
print(f"No PDF files found in '{input_dir}'")
return
print(f"Found {len(pdf_files)} PDF file(s)")
print(f"Converting with {dpi} DPI for high quality...")
for pdf_file in pdf_files:
try:
print(f"\nProcessing: {pdf_file.name}")
# Convert PDF to images
images = convert_from_path(
pdf_file,
dpi=dpi,
fmt='png',
thread_count=4 # Use multiple threads for faster conversion
)
# Save each page
for i, image in enumerate(images, start=1):
if len(images) > 1:
output_filename = f"{pdf_file.stem}_page_{i}.png"
else:
output_filename = f"{pdf_file.stem}.png"
output_file = output_path / output_filename
# Save with optimized compression
image.save(
output_file,
'PNG',
optimize=True, # Enable optimization
compress_level=6 # Balanced compression (0-9, 6 is good balance)
)
print(f" Saved: {output_filename} ({image.size[0]}x{image.size[1]}px)")
print(f"✓ Completed: {pdf_file.name} ({len(images)} page(s))")
except Exception as e:
print(f"✗ Error processing {pdf_file.name}: {str(e)}")
print(f"\n{'='*50}")
print(f"Conversion complete!")
print(f"Output directory: {output_path.absolute()}")
def split_images_into_sectors(input_dir, output_dir=None, overlap_percent=1):
"""
Split PNG images in a directory into 25 sectors (5x5 grid) with overlap.
Args:
input_dir: Directory containing PNG files
output_dir: Directory to save split images (defaults to input_dir/sectors)
overlap_percent: Percentage of overlap (default 1%)
"""
input_path = Path(input_dir)
if not input_path.exists():
print(f"Error: Directory '{input_dir}' does not exist")
return
# Set output directory
if output_dir is None:
output_path = input_path / "sectors"
else:
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
# Find all PNG files
png_files = list(input_path.glob("*.png"))
if not png_files:
print(f"No PNG files found in '{input_dir}'")
return
print(f"Found {len(png_files)} PNG file(s)")
print(f"Splitting into 25 sectors (5x5 grid) with {overlap_percent}% overlap...")
for png_file in png_files:
try:
print(f"\nProcessing: {png_file.name}")
# Open image
img = Image.open(png_file)
width, height = img.size
# Calculate overlap in pixels
h_overlap = int(width * overlap_percent / 100)
v_overlap = int(height * overlap_percent / 100)
# Calculate split points for 5x5 grid
h_split1 = width // 5
h_split2 = 2 * width // 5
h_split3 = 3 * width // 5
h_split4 = 4 * width // 5
v_split1 = height // 5
v_split2 = 2 * height // 5
v_split3 = 3 * height // 5
v_split4 = 4 * height // 5
# Define 25 sectors with overlap (5x5 grid)
# Format: (left, top, right, bottom)
sectors = {
# Row 1
'row1_col1': (0, 0, h_split1 + h_overlap, v_split1 + v_overlap),
'row1_col2': (h_split1 - h_overlap, 0, h_split2 + h_overlap, v_split1 + v_overlap),
'row1_col3': (h_split2 - h_overlap, 0, h_split3 + h_overlap, v_split1 + v_overlap),
'row1_col4': (h_split3 - h_overlap, 0, h_split4 + h_overlap, v_split1 + v_overlap),
'row1_col5': (h_split4 - h_overlap, 0, width, v_split1 + v_overlap),
# Row 2
'row2_col1': (0, v_split1 - v_overlap, h_split1 + h_overlap, v_split2 + v_overlap),
'row2_col2': (h_split1 - h_overlap, v_split1 - v_overlap, h_split2 + h_overlap, v_split2 + v_overlap),
'row2_col3': (h_split2 - h_overlap, v_split1 - v_overlap, h_split3 + h_overlap, v_split2 + v_overlap),
'row2_col4': (h_split3 - h_overlap, v_split1 - v_overlap, h_split4 + h_overlap, v_split2 + v_overlap),
'row2_col5': (h_split4 - h_overlap, v_split1 - v_overlap, width, v_split2 + v_overlap),
# Row 3
'row3_col1': (0, v_split2 - v_overlap, h_split1 + h_overlap, v_split3 + v_overlap),
'row3_col2': (h_split1 - h_overlap, v_split2 - v_overlap, h_split2 + h_overlap, v_split3 + v_overlap),
'row3_col3': (h_split2 - h_overlap, v_split2 - v_overlap, h_split3 + h_overlap, v_split3 + v_overlap),
'row3_col4': (h_split3 - h_overlap, v_split2 - v_overlap, h_split4 + h_overlap, v_split3 + v_overlap),
'row3_col5': (h_split4 - h_overlap, v_split2 - v_overlap, width, v_split3 + v_overlap),
# Row 4
'row4_col1': (0, v_split3 - v_overlap, h_split1 + h_overlap, v_split4 + v_overlap),
'row4_col2': (h_split1 - h_overlap, v_split3 - v_overlap, h_split2 + h_overlap, v_split4 + v_overlap),
'row4_col3': (h_split2 - h_overlap, v_split3 - v_overlap, h_split3 + h_overlap, v_split4 + v_overlap),
'row4_col4': (h_split3 - h_overlap, v_split3 - v_overlap, h_split4 + h_overlap, v_split4 + v_overlap),
'row4_col5': (h_split4 - h_overlap, v_split3 - v_overlap, width, v_split4 + v_overlap),
# Row 5
'row5_col1': (0, v_split4 - v_overlap, h_split1 + h_overlap, height),
'row5_col2': (h_split1 - h_overlap, v_split4 - v_overlap, h_split2 + h_overlap, height),
'row5_col3': (h_split2 - h_overlap, v_split4 - v_overlap, h_split3 + h_overlap, height),
'row5_col4': (h_split3 - h_overlap, v_split4 - v_overlap, h_split4 + h_overlap, height),
'row5_col5': (h_split4 - h_overlap, v_split4 - v_overlap, width, height)
}
# Crop and save each sector
for sector_name, bbox in sectors.items():
sector_img = img.crop(bbox)
output_filename = f"{png_file.stem}_{sector_name}.png"
output_file = output_path / output_filename
# Save with optimized compression
sector_img.save(
output_file,
'PNG',
optimize=True,
compress_level=6
)
sector_width = bbox[2] - bbox[0]
sector_height = bbox[3] - bbox[1]
print(f" Saved: {output_filename} ({sector_width}x{sector_height}px)")
print(f"✓ Completed: {png_file.name} (25 sectors)")
except Exception as e:
print(f"✗ Error processing {png_file.name}: {str(e)}")
print(f"\n{'='*50}")
print(f"Splitting complete!")
print(f"Output directory: {output_path.absolute()}")
if __name__ == "__main__":
# Example usage
# Step 1: Convert PDFs to PNGs
input_directory = "./02_Fluxogramas" # Change this to your PDF directory
output_directory = "./pngs" # Optional: specify output directory
# Convert with high DPI (300 is standard for print quality)
#convert_pdfs_to_png(input_directory, output_directory, dpi=300)
# Step 2: Split PNGs into 9 sectors (3x3 grid)
split_images_into_sectors("./pngs", "./sectors", overlap_percent=1)