import os from pathlib import Path from pdf2image import convert_from_path from PIL import Image import json def convert_pdfs_to_png(input_dir, output_dir=None, dpi=300): """ Convert all PDFs in a directory to PNG images. Args: input_dir: Directory containing PDF files output_dir: Directory to save PNG files (defaults to input_dir/png_output) dpi: Resolution for conversion (default 300 for high quality) """ input_path = Path(input_dir) if not input_path.exists(): print(f"Error: Directory '{input_dir}' does not exist") return # Set output directory if output_dir is None: output_path = input_path / "png_output" else: output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) # Find all PDF files pdf_files = list(input_path.glob("*.pdf")) if not pdf_files: print(f"No PDF files found in '{input_dir}'") return print(f"Found {len(pdf_files)} PDF file(s)") print(f"Converting with {dpi} DPI for high quality...") for pdf_file in pdf_files: try: print(f"\nProcessing: {pdf_file.name}") # Convert PDF to images images = convert_from_path( pdf_file, dpi=dpi, fmt='png', thread_count=4 # Use multiple threads for faster conversion ) # Save each page for i, image in enumerate(images, start=1): if len(images) > 1: output_filename = f"{pdf_file.stem}_page_{i}.png" else: output_filename = f"{pdf_file.stem}.png" output_file = output_path / output_filename # Save with optimized compression image.save( output_file, 'PNG', optimize=True, # Enable optimization compress_level=6 # Balanced compression (0-9, 6 is good balance) ) print(f" Saved: {output_filename} ({image.size[0]}x{image.size[1]}px)") print(f"✓ Completed: {pdf_file.name} ({len(images)} page(s))") except Exception as e: print(f"✗ Error processing {pdf_file.name}: {str(e)}") print(f"\n{'='*50}") print(f"Conversion complete!") print(f"Output directory: {output_path.absolute()}") def split_images_into_sectors(input_dir, output_dir=None, overlap_percent=1): """ Split PNG images in a directory into 25 sectors (5x5 grid) with overlap. Args: input_dir: Directory containing PNG files output_dir: Directory to save split images (defaults to input_dir/sectors) overlap_percent: Percentage of overlap (default 1%) """ input_path = Path(input_dir) if not input_path.exists(): print(f"Error: Directory '{input_dir}' does not exist") return # Set output directory if output_dir is None: output_path = input_path / "sectors" else: output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) # Find all PNG files png_files = list(input_path.glob("*.png")) if not png_files: print(f"No PNG files found in '{input_dir}'") return print(f"Found {len(png_files)} PNG file(s)") print(f"Splitting into 25 sectors (5x5 grid) with {overlap_percent}% overlap...") for png_file in png_files: try: print(f"\nProcessing: {png_file.name}") # Open image img = Image.open(png_file) width, height = img.size # Calculate overlap in pixels h_overlap = int(width * overlap_percent / 100) v_overlap = int(height * overlap_percent / 100) # Calculate split points for 5x5 grid h_split1 = width // 5 h_split2 = 2 * width // 5 h_split3 = 3 * width // 5 h_split4 = 4 * width // 5 v_split1 = height // 5 v_split2 = 2 * height // 5 v_split3 = 3 * height // 5 v_split4 = 4 * height // 5 # Define 25 sectors with overlap (5x5 grid) # Format: (left, top, right, bottom) sectors = { # Row 1 'row1_col1': (0, 0, h_split1 + h_overlap, v_split1 + v_overlap), 'row1_col2': (h_split1 - h_overlap, 0, h_split2 + h_overlap, v_split1 + v_overlap), 'row1_col3': (h_split2 - h_overlap, 0, h_split3 + h_overlap, v_split1 + v_overlap), 'row1_col4': (h_split3 - h_overlap, 0, h_split4 + h_overlap, v_split1 + v_overlap), 'row1_col5': (h_split4 - h_overlap, 0, width, v_split1 + v_overlap), # Row 2 'row2_col1': (0, v_split1 - v_overlap, h_split1 + h_overlap, v_split2 + v_overlap), 'row2_col2': (h_split1 - h_overlap, v_split1 - v_overlap, h_split2 + h_overlap, v_split2 + v_overlap), 'row2_col3': (h_split2 - h_overlap, v_split1 - v_overlap, h_split3 + h_overlap, v_split2 + v_overlap), 'row2_col4': (h_split3 - h_overlap, v_split1 - v_overlap, h_split4 + h_overlap, v_split2 + v_overlap), 'row2_col5': (h_split4 - h_overlap, v_split1 - v_overlap, width, v_split2 + v_overlap), # Row 3 'row3_col1': (0, v_split2 - v_overlap, h_split1 + h_overlap, v_split3 + v_overlap), 'row3_col2': (h_split1 - h_overlap, v_split2 - v_overlap, h_split2 + h_overlap, v_split3 + v_overlap), 'row3_col3': (h_split2 - h_overlap, v_split2 - v_overlap, h_split3 + h_overlap, v_split3 + v_overlap), 'row3_col4': (h_split3 - h_overlap, v_split2 - v_overlap, h_split4 + h_overlap, v_split3 + v_overlap), 'row3_col5': (h_split4 - h_overlap, v_split2 - v_overlap, width, v_split3 + v_overlap), # Row 4 'row4_col1': (0, v_split3 - v_overlap, h_split1 + h_overlap, v_split4 + v_overlap), 'row4_col2': (h_split1 - h_overlap, v_split3 - v_overlap, h_split2 + h_overlap, v_split4 + v_overlap), 'row4_col3': (h_split2 - h_overlap, v_split3 - v_overlap, h_split3 + h_overlap, v_split4 + v_overlap), 'row4_col4': (h_split3 - h_overlap, v_split3 - v_overlap, h_split4 + h_overlap, v_split4 + v_overlap), 'row4_col5': (h_split4 - h_overlap, v_split3 - v_overlap, width, v_split4 + v_overlap), # Row 5 'row5_col1': (0, v_split4 - v_overlap, h_split1 + h_overlap, height), 'row5_col2': (h_split1 - h_overlap, v_split4 - v_overlap, h_split2 + h_overlap, height), 'row5_col3': (h_split2 - h_overlap, v_split4 - v_overlap, h_split3 + h_overlap, height), 'row5_col4': (h_split3 - h_overlap, v_split4 - v_overlap, h_split4 + h_overlap, height), 'row5_col5': (h_split4 - h_overlap, v_split4 - v_overlap, width, height) } # Crop and save each sector for sector_name, bbox in sectors.items(): sector_img = img.crop(bbox) output_filename = f"{png_file.stem}_{sector_name}.png" output_file = output_path / output_filename # Save with optimized compression sector_img.save( output_file, 'PNG', optimize=True, compress_level=6 ) sector_width = bbox[2] - bbox[0] sector_height = bbox[3] - bbox[1] print(f" Saved: {output_filename} ({sector_width}x{sector_height}px)") print(f"✓ Completed: {png_file.name} (25 sectors)") except Exception as e: print(f"✗ Error processing {png_file.name}: {str(e)}") print(f"\n{'='*50}") print(f"Splitting complete!") print(f"Output directory: {output_path.absolute()}") if __name__ == "__main__": # Example usage # Step 1: Convert PDFs to PNGs input_directory = "./02_Fluxogramas" # Change this to your PDF directory output_directory = "./pngs" # Optional: specify output directory # Convert with high DPI (300 is standard for print quality) #convert_pdfs_to_png(input_directory, output_directory, dpi=300) # Step 2: Split PNGs into 9 sectors (3x3 grid) split_images_into_sectors("./pngs", "./sectors", overlap_percent=1)