Files
AI-coodex-rekog-image-labeling/label/cores/sobel.py
2026-05-14 14:07:04 -03:00

376 lines
11 KiB
Python

#!/usr/bin/env python3
"""
PDF Edge Detection with Color Grouping (Preserving Edge Segregation)
Input: input.pdf
Output: output_sobel/ folder
"""
import cv2
import numpy as np
from pdf2image import convert_from_path
import os
import shutil
from collections import Counter
def clear_output_directory(output_dir):
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)
def enhance_pastel_colors(image_bgr):
"""
Increase saturation of pastel colors, keep gray closer to black.
"""
hsv = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2HSV).astype(np.float32)
h, s, v = cv2.split(hsv)
# Identify pastel colors
pastel_mask = (v > 150) & (s < 100) & (s > 10)
# Identify gray
gray_mask = (s <= 10)
# Boost saturation for pastels
s[pastel_mask] = np.clip(s[pastel_mask] * 2.5, 0, 255)
# Darken grays
v[gray_mask] = np.clip(v[gray_mask] * 0.3, 0, 255)
# Reconstruct
hsv_enhanced = cv2.merge([h, s, v]).astype(np.uint8)
result = cv2.cvtColor(hsv_enhanced, cv2.COLOR_HSV2BGR)
return result
def sobel_edge_detection(image):
"""Apply Sobel filter to detect edges."""
# Quantize colors
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
h_quantized = (h // 5) * 5
s_quantized = (s // 64) * 64
v_quantized = (v // 64) * 64
hsv_quantized = cv2.merge([h_quantized, s_quantized, v_quantized])
image_quantized = cv2.cvtColor(hsv_quantized, cv2.COLOR_HSV2BGR)
# Apply Sobel
b, g, r = cv2.split(image_quantized)
edges_b = np.sqrt(cv2.Sobel(b, cv2.CV_64F, 1, 0, ksize=3)**2 +
cv2.Sobel(b, cv2.CV_64F, 0, 1, ksize=3)**2)
edges_g = np.sqrt(cv2.Sobel(g, cv2.CV_64F, 1, 0, ksize=3)**2 +
cv2.Sobel(g, cv2.CV_64F, 0, 1, ksize=3)**2)
edges_r = np.sqrt(cv2.Sobel(r, cv2.CV_64F, 1, 0, ksize=3)**2 +
cv2.Sobel(r, cv2.CV_64F, 0, 1, ksize=3)**2)
combined = np.sqrt(edges_b**2 + edges_g**2 + edges_r**2)
combined = combined / (combined.max() + 1e-8)
edge_mask = (combined > 0.10).astype(np.uint8) * 255
kernel = np.ones((2, 2), np.uint8)
edge_mask = cv2.morphologyEx(edge_mask, cv2.MORPH_CLOSE, kernel)
# Create BGRA with original colors
result = np.zeros((image.shape[0], image.shape[1], 4), dtype=np.uint8)
result[edge_mask > 0, :3] = image[edge_mask > 0]
result[edge_mask > 0, 3] = 255
# Remove white pixels
white_mask = np.all(result[:, :, :3] > 240, axis=2)
result[white_mask, 3] = 0
return result
def analyze_edge_colors(edge_img, edge_mask):
"""
Analyze if an edge has multiple distinct colors.
Returns:
(has_multiple_colors, num_colors, dominant_hues)
"""
bgr = edge_img[:, :, :3]
pixels = bgr[edge_mask]
# Filter white
non_white = pixels[~np.all(pixels > 240, axis=1)]
if len(non_white) < 10:
return False, 0, []
# Convert to HSV
hsv = cv2.cvtColor(non_white.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3)
# Filter low saturation (gray)
saturated_mask = hsv[:, 1] > 30
saturated_hsv = hsv[saturated_mask]
if len(saturated_hsv) < 10:
return False, 0, []
# Quantize hue into bins (every 10 degrees)
hue_bins = (saturated_hsv[:, 0] // 10).astype(np.int32)
# Count occurrences
unique_hues, counts = np.unique(hue_bins, return_counts=True)
# Filter significant hues (>5% of pixels)
total = len(hue_bins)
significant_mask = counts > (total * 0.05)
significant_hues = unique_hues[significant_mask]
num_colors = len(significant_hues)
return num_colors > 1, num_colors, significant_hues.tolist()
def split_edge_by_color(edges_bgra, edge_mask, labels, edge_id, num_colors):
"""
Split edge into multiple sub-edges based on color using K-means.
Returns:
List of (sub_edge_image, cluster_id) tuples
"""
bgr = edges_bgra[:, :, :3]
# Get edge pixels
y_coords, x_coords = np.where(edge_mask)
edge_pixels = bgr[edge_mask]
# Filter white and convert to HSV
non_white_mask = ~np.all(edge_pixels > 240, axis=1)
valid_pixels = edge_pixels[non_white_mask]
valid_y = y_coords[non_white_mask]
valid_x = x_coords[non_white_mask]
if len(valid_pixels) < 10:
# Return original edge
edge_img = np.zeros_like(edges_bgra)
edge_img[edge_mask] = edges_bgra[edge_mask]
return [(edge_img, 0)]
# Convert to HSV for clustering (use only H and S)
hsv = cv2.cvtColor(valid_pixels.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3)
features = hsv[:, :2].astype(np.float32) # Hue and Saturation only
# K-means clustering
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
_, cluster_labels, centers = cv2.kmeans(features, num_colors, None, criteria, 3, cv2.KMEANS_PP_CENTERS)
cluster_labels = cluster_labels.flatten()
# Create sub-edges (keep them separate!)
sub_edges = []
for cluster_id in range(num_colors):
cluster_mask_1d = (cluster_labels == cluster_id)
# Create separate image for this sub-edge
sub_edge_img = np.zeros_like(edges_bgra)
cluster_y = valid_y[cluster_mask_1d]
cluster_x = valid_x[cluster_mask_1d]
sub_edge_img[cluster_y, cluster_x] = edges_bgra[cluster_y, cluster_x]
sub_edges.append((sub_edge_img, cluster_id))
return sub_edges
def get_edge_mode_color(edge_img, edge_mask):
"""
Get the mode (most common) color of an edge.
"""
bgr = edge_img[:, :, :3]
pixels = bgr[edge_mask]
# Filter white
non_white = pixels[~np.all(pixels > 240, axis=1)]
if len(non_white) == 0:
return None
# Convert to HSV
hsv = cv2.cvtColor(non_white.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3)
# Filter low saturation
saturated_mask = (hsv[:, 1] > 30)
saturated_pixels = non_white[saturated_mask]
if len(saturated_pixels) == 0:
saturated_pixels = non_white
# Get mode color
pixel_ints = (saturated_pixels[:, 0].astype(np.int32) +
saturated_pixels[:, 1].astype(np.int32) * 256 +
saturated_pixels[:, 2].astype(np.int32) * 65536)
mode_int = np.bincount(pixel_ints).argmax()
mode_color = np.array([
mode_int % 256,
(mode_int // 256) % 256,
(mode_int // 65536) % 256
], dtype=np.uint8)
return mode_color
def process_and_group_edges(edges_bgra, color_threshold=30):
"""
Process edges: split multi-color edges, then group by color.
Edges remain separate (segregated) even within groups.
Returns:
List of (group_image, mode_color, edge_count) tuples
"""
alpha = edges_bgra[:, :, 3]
# Find connected components
num_labels, labels = cv2.connectedComponents(alpha)
print(f" Found {num_labels - 1} edges")
if num_labels <= 1:
return []
# Process each edge: split if multi-color
all_edge_images = []
for edge_id in range(1, num_labels):
edge_mask = (labels == edge_id)
if not np.any(edge_mask):
continue
# Analyze colors
has_multiple, num_colors, hues = analyze_edge_colors(edges_bgra, edge_mask)
if has_multiple:
print(f" Edge {edge_id}: {num_colors} colors detected, splitting...")
# Split into sub-edges
sub_edges = split_edge_by_color(edges_bgra, edge_mask, labels, edge_id, num_colors)
all_edge_images.extend(sub_edges)
else:
# Keep as single edge
edge_img = np.zeros_like(edges_bgra)
edge_img[edge_mask] = edges_bgra[edge_mask]
all_edge_images.append((edge_img, 0))
print(f" Total edges after splitting: {len(all_edge_images)}")
# Get mode color for each edge
edge_colors = []
for edge_img, cluster_id in all_edge_images:
edge_mask = edge_img[:, :, 3] > 0
mode_color = get_edge_mode_color(edge_img, edge_mask)
edge_colors.append(mode_color)
# Group by similar colors
groups = []
used_indices = set()
for i, mode_color in enumerate(edge_colors):
if i in used_indices or mode_color is None:
continue
# Start new group
group_indices = [i]
used_indices.add(i)
# Find similar edges
for j, other_color in enumerate(edge_colors):
if j in used_indices or other_color is None:
continue
# Calculate color distance
distance = np.linalg.norm(mode_color.astype(float) - other_color.astype(float))
if distance <= color_threshold:
group_indices.append(j)
used_indices.add(j)
# Create group image (edges remain separate!)
group_img = np.zeros_like(edges_bgra)
for idx in group_indices:
edge_img, _ = all_edge_images[idx]
mask = edge_img[:, :, 3] > 0
group_img[mask] = edge_img[mask]
groups.append((group_img, mode_color, len(group_indices)))
print(f" Grouped into {len(groups)} color groups")
return groups
def process_pdf(pdf_path, output_dir, dpi=200):
clear_output_directory(output_dir)
print(f"Processing PDF: {pdf_path}")
print(f"Converting at {dpi} DPI...\n")
images = convert_from_path(pdf_path, dpi=dpi)
print(f"Total pages: {len(images)}\n")
for page_num, pil_image in enumerate(images, start=1):
print(f"Page {page_num}/{len(images)}...")
# Convert to BGR
image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
# Enhance pastel colors
print(f" - Enhancing pastel colors...")
enhanced_image = enhance_pastel_colors(image)
# Detect edges
print(f" - Detecting edges...")
edges = sobel_edge_detection(enhanced_image)
# Process and group edges
print(f" - Processing and grouping edges by color...")
groups = process_and_group_edges(edges, color_threshold=30)
# Save outputs
base = f"page{page_num:03d}"
cv2.imwrite(os.path.join(output_dir, f"{base}_original.png"), image)
cv2.imwrite(os.path.join(output_dir, f"{base}_enhanced.png"), enhanced_image)
cv2.imwrite(os.path.join(output_dir, f"{base}_edges.png"), edges)
# Save each group
for group_idx, (group_img, mode_color, edge_count) in enumerate(groups, start=1):
path = os.path.join(output_dir, f"{base}_group{group_idx}.png")
cv2.imwrite(path, group_img)
print(f" Group {group_idx}: {edge_count} edges, mode color (BGR): {tuple(mode_color)}")
print(f" - Saved {len(groups)} group images\n")
print("Complete!")
def main():
pdf_path = "input.pdf"
output_dir = "output_sobel"
if not os.path.exists(pdf_path):
print(f"Error: '{pdf_path}' not found!")
return 1
try:
process_pdf(pdf_path, output_dir)
return 0
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
exit(main())