275 lines
9.9 KiB
Python
275 lines
9.9 KiB
Python
import os
|
|
import shutil
|
|
import numpy as np
|
|
from PIL import Image
|
|
from pdf2image import convert_from_path
|
|
|
|
def clear_output_folder(folder):
|
|
"""Clear and create output folder."""
|
|
if os.path.exists(folder):
|
|
shutil.rmtree(folder)
|
|
os.makedirs(folder)
|
|
|
|
def load_image(file_path, dpi=300):
|
|
"""Load image from PDF or image file."""
|
|
file_ext = os.path.splitext(file_path)[1].lower()
|
|
|
|
if file_ext == '.pdf':
|
|
print(f"Converting PDF to image (DPI: {dpi})...")
|
|
images = convert_from_path(file_path, dpi=dpi, fmt='png')
|
|
img = images[0] # First page only
|
|
print(f" Converted: {img.size[0]}x{img.size[1]}")
|
|
else:
|
|
print(f"Loading image...")
|
|
img = Image.open(file_path)
|
|
print(f" Loaded: {img.size[0]}x{img.size[1]}")
|
|
|
|
# Convert to RGB
|
|
if img.mode != 'RGB':
|
|
img = img.convert('RGB')
|
|
|
|
return img
|
|
|
|
def find_main_colors(img, color_threshold=30, min_percentage=0.5):
|
|
"""
|
|
Find main distinct colors in image.
|
|
|
|
Algorithm:
|
|
1. Find most common color
|
|
2. Group all colors within distance threshold (Euclidean distance in RGB space)
|
|
3. Remove those colors
|
|
4. Find next most common color
|
|
5. Repeat until no colors left
|
|
|
|
Parameters:
|
|
- color_threshold: Maximum distance between colors to group them (0-441)
|
|
Distance = sqrt((R1-R2)² + (G1-G2)² + (B1-B2)²)
|
|
- min_percentage: Minimum percentage to be a "main" color
|
|
"""
|
|
print(f"\nAnalyzing colors...")
|
|
print(f" Color distance threshold: {color_threshold}")
|
|
print(f" Minimum percentage: {min_percentage}%")
|
|
|
|
# Get all pixels
|
|
pixels = np.array(img)
|
|
h, w = pixels.shape[:2]
|
|
pixels = pixels.reshape(-1, 3)
|
|
total_pixels = len(pixels)
|
|
|
|
# Remove white background (>= 250 in all RGB channels)
|
|
is_white = (pixels[:, 0] >= 250) & (pixels[:, 1] >= 250) & (pixels[:, 2] >= 250)
|
|
pixels = pixels[~is_white]
|
|
|
|
print(f" Total pixels: {total_pixels:,}")
|
|
print(f" White background: {np.sum(is_white):,} ({np.sum(is_white)/total_pixels*100:.1f}%) - IGNORED")
|
|
print(f" Color pixels: {len(pixels):,} ({len(pixels)/total_pixels*100:.1f}%)")
|
|
|
|
if len(pixels) == 0:
|
|
print(" Error: Image is entirely white!")
|
|
return []
|
|
|
|
# Get unique colors and their counts
|
|
unique_colors, counts = np.unique(pixels, axis=0, return_counts=True)
|
|
print(f" Unique colors found: {len(unique_colors):,}")
|
|
|
|
# Greedy grouping by frequency
|
|
print(f"\n Grouping colors (greedy by frequency)...")
|
|
|
|
color_groups = []
|
|
remaining = np.ones(len(unique_colors), dtype=bool) # Track which colors are still available
|
|
|
|
iteration = 0
|
|
while np.any(remaining):
|
|
iteration += 1
|
|
|
|
# Find most common remaining color
|
|
remaining_counts = counts.copy()
|
|
remaining_counts[~remaining] = 0 # Zero out already-used colors
|
|
|
|
if np.max(remaining_counts) == 0:
|
|
break
|
|
|
|
most_common_idx = np.argmax(remaining_counts)
|
|
base_color = unique_colors[most_common_idx]
|
|
|
|
# Calculate Euclidean distance from base_color to all colors
|
|
# Distance = sqrt((R1-R2)² + (G1-G2)² + (B1-B2)²)
|
|
diff = unique_colors.astype(float) - base_color.astype(float)
|
|
distances = np.sqrt(np.sum(diff ** 2, axis=1))
|
|
|
|
# Find all colors within threshold distance
|
|
within_threshold = (distances <= color_threshold) & remaining
|
|
|
|
# Mark these colors as used
|
|
remaining[within_threshold] = False
|
|
|
|
# Group info
|
|
group_colors = unique_colors[within_threshold]
|
|
group_counts = counts[within_threshold]
|
|
total_count = np.sum(group_counts)
|
|
percentage = (total_count / len(pixels)) * 100
|
|
|
|
color_groups.append({
|
|
'color': base_color,
|
|
'count': total_count,
|
|
'percentage': percentage,
|
|
'num_variants': len(group_colors)
|
|
})
|
|
|
|
print(f" Group {iteration}: RGB{tuple(base_color)} -> {len(group_colors)} variants, {percentage:.1f}%")
|
|
|
|
print(f" Created {len(color_groups)} color groups")
|
|
|
|
# Filter by minimum percentage
|
|
color_groups = [g for g in color_groups if g['percentage'] >= min_percentage]
|
|
|
|
print(f" Main colors (>= {min_percentage}%): {len(color_groups)}")
|
|
|
|
# Verify percentages
|
|
total_percentage = sum(g['percentage'] for g in color_groups)
|
|
print(f" Total percentage: {total_percentage:.1f}%")
|
|
|
|
# Display results
|
|
print(f"\n{'='*60}")
|
|
print(f"MAIN COLORS:")
|
|
print(f"{'='*60}")
|
|
for i, group in enumerate(color_groups, 1):
|
|
r, g, b = group['color']
|
|
print(f"{i}. RGB({r:3d}, {g:3d}, {b:3d}) - {group['percentage']:5.1f}% ({group['count']:,} pixels, {group['num_variants']} variants)")
|
|
|
|
return color_groups
|
|
|
|
def create_color_layers(img, color_groups, color_threshold, output_folder='output'):
|
|
"""Create one image per color group showing only that color."""
|
|
print(f"\nCreating color layers...")
|
|
|
|
# Get all pixels
|
|
pixels = np.array(img)
|
|
h, w = pixels.shape[:2]
|
|
original_pixels = pixels.reshape(-1, 3)
|
|
|
|
# Remove white background for grouping
|
|
is_white = (original_pixels[:, 0] >= 250) & (original_pixels[:, 1] >= 250) & (original_pixels[:, 2] >= 250)
|
|
|
|
# Get unique colors for matching
|
|
unique_colors, inverse = np.unique(original_pixels[~is_white], axis=0, return_inverse=True)
|
|
|
|
# For each color group, create a layer
|
|
for i, group in enumerate(color_groups, 1):
|
|
base_color = group['color']
|
|
|
|
# Calculate distances from base_color to all unique colors
|
|
diff = unique_colors.astype(float) - base_color.astype(float)
|
|
distances = np.sqrt(np.sum(diff ** 2, axis=1))
|
|
|
|
# Find which unique colors belong to this group
|
|
in_group = distances <= color_threshold
|
|
|
|
# Create mask for pixels in this group
|
|
pixel_mask = np.zeros(len(original_pixels), dtype=bool)
|
|
pixel_mask[~is_white] = in_group[inverse]
|
|
|
|
# Create layer image (white background)
|
|
layer = np.full((h, w, 3), 255, dtype=np.uint8)
|
|
layer_flat = layer.reshape(-1, 3)
|
|
|
|
# Set pixels for this color group
|
|
layer_flat[pixel_mask] = original_pixels[pixel_mask]
|
|
|
|
# Save layer
|
|
r, g, b = base_color
|
|
filename = f'layer_{i}_rgb{r}_{g}_{b}.png'
|
|
filepath = os.path.join(output_folder, filename)
|
|
Image.fromarray(layer).save(filepath)
|
|
|
|
pixel_count = np.sum(pixel_mask)
|
|
print(f" Layer {i}: {filename} ({pixel_count:,} pixels)")
|
|
|
|
def save_results(color_groups, output_folder='output'):
|
|
"""Save color palette to file."""
|
|
output_path = os.path.join(output_folder, 'main_colors.txt')
|
|
|
|
with open(output_path, 'w') as f:
|
|
f.write("MAIN COLORS (by frequency)\n")
|
|
f.write("="*60 + "\n")
|
|
f.write("Note: White background ignored\n")
|
|
f.write(" Similar colors grouped together\n\n")
|
|
|
|
for i, group in enumerate(color_groups, 1):
|
|
r, g, b = group['color']
|
|
f.write(f"{i}. RGB({r}, {g}, {b})\n")
|
|
f.write(f" {group['percentage']:.2f}% ({group['count']:,} pixels)\n")
|
|
f.write(f" {group['num_variants']} color variants\n")
|
|
f.write(f" Hex: #{r:02X}{g:02X}{b:02X}\n\n")
|
|
|
|
print(f"\nResults saved to: {output_path}")
|
|
|
|
def main(file_path, color_threshold=30, min_percentage=0.5, dpi=300, output_folder='output'):
|
|
"""Main function."""
|
|
print("="*60)
|
|
print("COLOR EXTRACTOR - Find Main Colors")
|
|
print("="*60)
|
|
|
|
# Clear output
|
|
clear_output_folder(output_folder)
|
|
|
|
# Load image
|
|
print(f"\nInput: {file_path}")
|
|
img = load_image(file_path, dpi)
|
|
|
|
# Save original
|
|
original_path = os.path.join(output_folder, 'original.png')
|
|
img.save(original_path)
|
|
|
|
# Find main colors
|
|
color_groups = find_main_colors(img, color_threshold, min_percentage)
|
|
|
|
if len(color_groups) == 0:
|
|
print("\nNo main colors found.")
|
|
return
|
|
|
|
# Create color layers
|
|
create_color_layers(img, color_groups, color_threshold, output_folder)
|
|
|
|
# Save results
|
|
save_results(color_groups, output_folder)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"✓ COMPLETE - Found {len(color_groups)} main colors")
|
|
print(f" Created {len(color_groups)} color layer images")
|
|
print(f"{'='*60}")
|
|
|
|
if __name__ == "__main__":
|
|
# Input file (PDF or image)
|
|
file_path = "input.pdf" # or "input.png", "input.jpg", etc.
|
|
|
|
if not os.path.exists(file_path):
|
|
print(f"Error: '{file_path}' not found!")
|
|
print("Usage: Place your file as 'input.pdf' or 'input.png'")
|
|
else:
|
|
# Parameters:
|
|
# color_threshold: Distance between colors to group them (0-441)
|
|
# Distance = sqrt((R1-R2)² + (G1-G2)² + (B1-B2)²)
|
|
# Examples:
|
|
# RGB(0,0,0) to RGB(0,0,1) = distance of 1
|
|
# RGB(0,0,0) to RGB(10,10,10) = distance of ~17
|
|
# RGB(0,0,0) to RGB(30,30,30) = distance of ~52
|
|
# Recommended values:
|
|
# 10-20: Very strict - only very similar colors grouped
|
|
# 30-50: Good for most diagrams (RECOMMENDED)
|
|
# 60-100: Loose - more aggressive grouping
|
|
#
|
|
# min_percentage: Minimum % to be a "main" color
|
|
# 0.5: Include colors that are at least 0.5% of image
|
|
# 1.0: Only colors that are at least 1% of image
|
|
# 0.1: Include even small but significant colors
|
|
#
|
|
# dpi: Resolution for PDF conversion (300 recommended)
|
|
|
|
main(
|
|
file_path=file_path,
|
|
color_threshold=120, # Group similar colors
|
|
min_percentage=3, # Min 0.5% to be considered "main"
|
|
dpi=300, # PDF resolution
|
|
output_folder='output'
|
|
) |