Initial commit
This commit is contained in:
275
label/cores/cores.py
Normal file
275
label/cores/cores.py
Normal file
@@ -0,0 +1,275 @@
|
||||
import os
|
||||
import shutil
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from pdf2image import convert_from_path
|
||||
|
||||
def clear_output_folder(folder):
|
||||
"""Clear and create output folder."""
|
||||
if os.path.exists(folder):
|
||||
shutil.rmtree(folder)
|
||||
os.makedirs(folder)
|
||||
|
||||
def load_image(file_path, dpi=300):
|
||||
"""Load image from PDF or image file."""
|
||||
file_ext = os.path.splitext(file_path)[1].lower()
|
||||
|
||||
if file_ext == '.pdf':
|
||||
print(f"Converting PDF to image (DPI: {dpi})...")
|
||||
images = convert_from_path(file_path, dpi=dpi, fmt='png')
|
||||
img = images[0] # First page only
|
||||
print(f" Converted: {img.size[0]}x{img.size[1]}")
|
||||
else:
|
||||
print(f"Loading image...")
|
||||
img = Image.open(file_path)
|
||||
print(f" Loaded: {img.size[0]}x{img.size[1]}")
|
||||
|
||||
# Convert to RGB
|
||||
if img.mode != 'RGB':
|
||||
img = img.convert('RGB')
|
||||
|
||||
return img
|
||||
|
||||
def find_main_colors(img, color_threshold=30, min_percentage=0.5):
|
||||
"""
|
||||
Find main distinct colors in image.
|
||||
|
||||
Algorithm:
|
||||
1. Find most common color
|
||||
2. Group all colors within distance threshold (Euclidean distance in RGB space)
|
||||
3. Remove those colors
|
||||
4. Find next most common color
|
||||
5. Repeat until no colors left
|
||||
|
||||
Parameters:
|
||||
- color_threshold: Maximum distance between colors to group them (0-441)
|
||||
Distance = sqrt((R1-R2)² + (G1-G2)² + (B1-B2)²)
|
||||
- min_percentage: Minimum percentage to be a "main" color
|
||||
"""
|
||||
print(f"\nAnalyzing colors...")
|
||||
print(f" Color distance threshold: {color_threshold}")
|
||||
print(f" Minimum percentage: {min_percentage}%")
|
||||
|
||||
# Get all pixels
|
||||
pixels = np.array(img)
|
||||
h, w = pixels.shape[:2]
|
||||
pixels = pixels.reshape(-1, 3)
|
||||
total_pixels = len(pixels)
|
||||
|
||||
# Remove white background (>= 250 in all RGB channels)
|
||||
is_white = (pixels[:, 0] >= 250) & (pixels[:, 1] >= 250) & (pixels[:, 2] >= 250)
|
||||
pixels = pixels[~is_white]
|
||||
|
||||
print(f" Total pixels: {total_pixels:,}")
|
||||
print(f" White background: {np.sum(is_white):,} ({np.sum(is_white)/total_pixels*100:.1f}%) - IGNORED")
|
||||
print(f" Color pixels: {len(pixels):,} ({len(pixels)/total_pixels*100:.1f}%)")
|
||||
|
||||
if len(pixels) == 0:
|
||||
print(" Error: Image is entirely white!")
|
||||
return []
|
||||
|
||||
# Get unique colors and their counts
|
||||
unique_colors, counts = np.unique(pixels, axis=0, return_counts=True)
|
||||
print(f" Unique colors found: {len(unique_colors):,}")
|
||||
|
||||
# Greedy grouping by frequency
|
||||
print(f"\n Grouping colors (greedy by frequency)...")
|
||||
|
||||
color_groups = []
|
||||
remaining = np.ones(len(unique_colors), dtype=bool) # Track which colors are still available
|
||||
|
||||
iteration = 0
|
||||
while np.any(remaining):
|
||||
iteration += 1
|
||||
|
||||
# Find most common remaining color
|
||||
remaining_counts = counts.copy()
|
||||
remaining_counts[~remaining] = 0 # Zero out already-used colors
|
||||
|
||||
if np.max(remaining_counts) == 0:
|
||||
break
|
||||
|
||||
most_common_idx = np.argmax(remaining_counts)
|
||||
base_color = unique_colors[most_common_idx]
|
||||
|
||||
# Calculate Euclidean distance from base_color to all colors
|
||||
# Distance = sqrt((R1-R2)² + (G1-G2)² + (B1-B2)²)
|
||||
diff = unique_colors.astype(float) - base_color.astype(float)
|
||||
distances = np.sqrt(np.sum(diff ** 2, axis=1))
|
||||
|
||||
# Find all colors within threshold distance
|
||||
within_threshold = (distances <= color_threshold) & remaining
|
||||
|
||||
# Mark these colors as used
|
||||
remaining[within_threshold] = False
|
||||
|
||||
# Group info
|
||||
group_colors = unique_colors[within_threshold]
|
||||
group_counts = counts[within_threshold]
|
||||
total_count = np.sum(group_counts)
|
||||
percentage = (total_count / len(pixels)) * 100
|
||||
|
||||
color_groups.append({
|
||||
'color': base_color,
|
||||
'count': total_count,
|
||||
'percentage': percentage,
|
||||
'num_variants': len(group_colors)
|
||||
})
|
||||
|
||||
print(f" Group {iteration}: RGB{tuple(base_color)} -> {len(group_colors)} variants, {percentage:.1f}%")
|
||||
|
||||
print(f" Created {len(color_groups)} color groups")
|
||||
|
||||
# Filter by minimum percentage
|
||||
color_groups = [g for g in color_groups if g['percentage'] >= min_percentage]
|
||||
|
||||
print(f" Main colors (>= {min_percentage}%): {len(color_groups)}")
|
||||
|
||||
# Verify percentages
|
||||
total_percentage = sum(g['percentage'] for g in color_groups)
|
||||
print(f" Total percentage: {total_percentage:.1f}%")
|
||||
|
||||
# Display results
|
||||
print(f"\n{'='*60}")
|
||||
print(f"MAIN COLORS:")
|
||||
print(f"{'='*60}")
|
||||
for i, group in enumerate(color_groups, 1):
|
||||
r, g, b = group['color']
|
||||
print(f"{i}. RGB({r:3d}, {g:3d}, {b:3d}) - {group['percentage']:5.1f}% ({group['count']:,} pixels, {group['num_variants']} variants)")
|
||||
|
||||
return color_groups
|
||||
|
||||
def create_color_layers(img, color_groups, color_threshold, output_folder='output'):
|
||||
"""Create one image per color group showing only that color."""
|
||||
print(f"\nCreating color layers...")
|
||||
|
||||
# Get all pixels
|
||||
pixels = np.array(img)
|
||||
h, w = pixels.shape[:2]
|
||||
original_pixels = pixels.reshape(-1, 3)
|
||||
|
||||
# Remove white background for grouping
|
||||
is_white = (original_pixels[:, 0] >= 250) & (original_pixels[:, 1] >= 250) & (original_pixels[:, 2] >= 250)
|
||||
|
||||
# Get unique colors for matching
|
||||
unique_colors, inverse = np.unique(original_pixels[~is_white], axis=0, return_inverse=True)
|
||||
|
||||
# For each color group, create a layer
|
||||
for i, group in enumerate(color_groups, 1):
|
||||
base_color = group['color']
|
||||
|
||||
# Calculate distances from base_color to all unique colors
|
||||
diff = unique_colors.astype(float) - base_color.astype(float)
|
||||
distances = np.sqrt(np.sum(diff ** 2, axis=1))
|
||||
|
||||
# Find which unique colors belong to this group
|
||||
in_group = distances <= color_threshold
|
||||
|
||||
# Create mask for pixels in this group
|
||||
pixel_mask = np.zeros(len(original_pixels), dtype=bool)
|
||||
pixel_mask[~is_white] = in_group[inverse]
|
||||
|
||||
# Create layer image (white background)
|
||||
layer = np.full((h, w, 3), 255, dtype=np.uint8)
|
||||
layer_flat = layer.reshape(-1, 3)
|
||||
|
||||
# Set pixels for this color group
|
||||
layer_flat[pixel_mask] = original_pixels[pixel_mask]
|
||||
|
||||
# Save layer
|
||||
r, g, b = base_color
|
||||
filename = f'layer_{i}_rgb{r}_{g}_{b}.png'
|
||||
filepath = os.path.join(output_folder, filename)
|
||||
Image.fromarray(layer).save(filepath)
|
||||
|
||||
pixel_count = np.sum(pixel_mask)
|
||||
print(f" Layer {i}: {filename} ({pixel_count:,} pixels)")
|
||||
|
||||
def save_results(color_groups, output_folder='output'):
|
||||
"""Save color palette to file."""
|
||||
output_path = os.path.join(output_folder, 'main_colors.txt')
|
||||
|
||||
with open(output_path, 'w') as f:
|
||||
f.write("MAIN COLORS (by frequency)\n")
|
||||
f.write("="*60 + "\n")
|
||||
f.write("Note: White background ignored\n")
|
||||
f.write(" Similar colors grouped together\n\n")
|
||||
|
||||
for i, group in enumerate(color_groups, 1):
|
||||
r, g, b = group['color']
|
||||
f.write(f"{i}. RGB({r}, {g}, {b})\n")
|
||||
f.write(f" {group['percentage']:.2f}% ({group['count']:,} pixels)\n")
|
||||
f.write(f" {group['num_variants']} color variants\n")
|
||||
f.write(f" Hex: #{r:02X}{g:02X}{b:02X}\n\n")
|
||||
|
||||
print(f"\nResults saved to: {output_path}")
|
||||
|
||||
def main(file_path, color_threshold=30, min_percentage=0.5, dpi=300, output_folder='output'):
|
||||
"""Main function."""
|
||||
print("="*60)
|
||||
print("COLOR EXTRACTOR - Find Main Colors")
|
||||
print("="*60)
|
||||
|
||||
# Clear output
|
||||
clear_output_folder(output_folder)
|
||||
|
||||
# Load image
|
||||
print(f"\nInput: {file_path}")
|
||||
img = load_image(file_path, dpi)
|
||||
|
||||
# Save original
|
||||
original_path = os.path.join(output_folder, 'original.png')
|
||||
img.save(original_path)
|
||||
|
||||
# Find main colors
|
||||
color_groups = find_main_colors(img, color_threshold, min_percentage)
|
||||
|
||||
if len(color_groups) == 0:
|
||||
print("\nNo main colors found.")
|
||||
return
|
||||
|
||||
# Create color layers
|
||||
create_color_layers(img, color_groups, color_threshold, output_folder)
|
||||
|
||||
# Save results
|
||||
save_results(color_groups, output_folder)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"✓ COMPLETE - Found {len(color_groups)} main colors")
|
||||
print(f" Created {len(color_groups)} color layer images")
|
||||
print(f"{'='*60}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Input file (PDF or image)
|
||||
file_path = "input.pdf" # or "input.png", "input.jpg", etc.
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
print(f"Error: '{file_path}' not found!")
|
||||
print("Usage: Place your file as 'input.pdf' or 'input.png'")
|
||||
else:
|
||||
# Parameters:
|
||||
# color_threshold: Distance between colors to group them (0-441)
|
||||
# Distance = sqrt((R1-R2)² + (G1-G2)² + (B1-B2)²)
|
||||
# Examples:
|
||||
# RGB(0,0,0) to RGB(0,0,1) = distance of 1
|
||||
# RGB(0,0,0) to RGB(10,10,10) = distance of ~17
|
||||
# RGB(0,0,0) to RGB(30,30,30) = distance of ~52
|
||||
# Recommended values:
|
||||
# 10-20: Very strict - only very similar colors grouped
|
||||
# 30-50: Good for most diagrams (RECOMMENDED)
|
||||
# 60-100: Loose - more aggressive grouping
|
||||
#
|
||||
# min_percentage: Minimum % to be a "main" color
|
||||
# 0.5: Include colors that are at least 0.5% of image
|
||||
# 1.0: Only colors that are at least 1% of image
|
||||
# 0.1: Include even small but significant colors
|
||||
#
|
||||
# dpi: Resolution for PDF conversion (300 recommended)
|
||||
|
||||
main(
|
||||
file_path=file_path,
|
||||
color_threshold=120, # Group similar colors
|
||||
min_percentage=3, # Min 0.5% to be considered "main"
|
||||
dpi=300, # PDF resolution
|
||||
output_folder='output'
|
||||
)
|
||||
Reference in New Issue
Block a user