#!/usr/bin/env python3 """ PDF Edge Detection with Color Grouping (Preserving Edge Segregation) Input: input.pdf Output: output_sobel/ folder """ import cv2 import numpy as np from pdf2image import convert_from_path import os import shutil from collections import Counter def clear_output_directory(output_dir): if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir, exist_ok=True) def enhance_pastel_colors(image_bgr): """ Increase saturation of pastel colors, keep gray closer to black. """ hsv = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2HSV).astype(np.float32) h, s, v = cv2.split(hsv) # Identify pastel colors pastel_mask = (v > 150) & (s < 100) & (s > 10) # Identify gray gray_mask = (s <= 10) # Boost saturation for pastels s[pastel_mask] = np.clip(s[pastel_mask] * 2.5, 0, 255) # Darken grays v[gray_mask] = np.clip(v[gray_mask] * 0.3, 0, 255) # Reconstruct hsv_enhanced = cv2.merge([h, s, v]).astype(np.uint8) result = cv2.cvtColor(hsv_enhanced, cv2.COLOR_HSV2BGR) return result def sobel_edge_detection(image): """Apply Sobel filter to detect edges.""" # Quantize colors hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) h, s, v = cv2.split(hsv) h_quantized = (h // 5) * 5 s_quantized = (s // 64) * 64 v_quantized = (v // 64) * 64 hsv_quantized = cv2.merge([h_quantized, s_quantized, v_quantized]) image_quantized = cv2.cvtColor(hsv_quantized, cv2.COLOR_HSV2BGR) # Apply Sobel b, g, r = cv2.split(image_quantized) edges_b = np.sqrt(cv2.Sobel(b, cv2.CV_64F, 1, 0, ksize=3)**2 + cv2.Sobel(b, cv2.CV_64F, 0, 1, ksize=3)**2) edges_g = np.sqrt(cv2.Sobel(g, cv2.CV_64F, 1, 0, ksize=3)**2 + cv2.Sobel(g, cv2.CV_64F, 0, 1, ksize=3)**2) edges_r = np.sqrt(cv2.Sobel(r, cv2.CV_64F, 1, 0, ksize=3)**2 + cv2.Sobel(r, cv2.CV_64F, 0, 1, ksize=3)**2) combined = np.sqrt(edges_b**2 + edges_g**2 + edges_r**2) combined = combined / (combined.max() + 1e-8) edge_mask = (combined > 0.10).astype(np.uint8) * 255 kernel = np.ones((2, 2), np.uint8) edge_mask = cv2.morphologyEx(edge_mask, cv2.MORPH_CLOSE, kernel) # Create BGRA with original colors result = np.zeros((image.shape[0], image.shape[1], 4), dtype=np.uint8) result[edge_mask > 0, :3] = image[edge_mask > 0] result[edge_mask > 0, 3] = 255 # Remove white pixels white_mask = np.all(result[:, :, :3] > 240, axis=2) result[white_mask, 3] = 0 return result def analyze_edge_colors(edge_img, edge_mask): """ Analyze if an edge has multiple distinct colors. Returns: (has_multiple_colors, num_colors, dominant_hues) """ bgr = edge_img[:, :, :3] pixels = bgr[edge_mask] # Filter white non_white = pixels[~np.all(pixels > 240, axis=1)] if len(non_white) < 10: return False, 0, [] # Convert to HSV hsv = cv2.cvtColor(non_white.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3) # Filter low saturation (gray) saturated_mask = hsv[:, 1] > 30 saturated_hsv = hsv[saturated_mask] if len(saturated_hsv) < 10: return False, 0, [] # Quantize hue into bins (every 10 degrees) hue_bins = (saturated_hsv[:, 0] // 10).astype(np.int32) # Count occurrences unique_hues, counts = np.unique(hue_bins, return_counts=True) # Filter significant hues (>5% of pixels) total = len(hue_bins) significant_mask = counts > (total * 0.05) significant_hues = unique_hues[significant_mask] num_colors = len(significant_hues) return num_colors > 1, num_colors, significant_hues.tolist() def split_edge_by_color(edges_bgra, edge_mask, labels, edge_id, num_colors): """ Split edge into multiple sub-edges based on color using K-means. Returns: List of (sub_edge_image, cluster_id) tuples """ bgr = edges_bgra[:, :, :3] # Get edge pixels y_coords, x_coords = np.where(edge_mask) edge_pixels = bgr[edge_mask] # Filter white and convert to HSV non_white_mask = ~np.all(edge_pixels > 240, axis=1) valid_pixels = edge_pixels[non_white_mask] valid_y = y_coords[non_white_mask] valid_x = x_coords[non_white_mask] if len(valid_pixels) < 10: # Return original edge edge_img = np.zeros_like(edges_bgra) edge_img[edge_mask] = edges_bgra[edge_mask] return [(edge_img, 0)] # Convert to HSV for clustering (use only H and S) hsv = cv2.cvtColor(valid_pixels.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3) features = hsv[:, :2].astype(np.float32) # Hue and Saturation only # K-means clustering criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0) _, cluster_labels, centers = cv2.kmeans(features, num_colors, None, criteria, 3, cv2.KMEANS_PP_CENTERS) cluster_labels = cluster_labels.flatten() # Create sub-edges (keep them separate!) sub_edges = [] for cluster_id in range(num_colors): cluster_mask_1d = (cluster_labels == cluster_id) # Create separate image for this sub-edge sub_edge_img = np.zeros_like(edges_bgra) cluster_y = valid_y[cluster_mask_1d] cluster_x = valid_x[cluster_mask_1d] sub_edge_img[cluster_y, cluster_x] = edges_bgra[cluster_y, cluster_x] sub_edges.append((sub_edge_img, cluster_id)) return sub_edges def get_edge_mode_color(edge_img, edge_mask): """ Get the mode (most common) color of an edge. """ bgr = edge_img[:, :, :3] pixels = bgr[edge_mask] # Filter white non_white = pixels[~np.all(pixels > 240, axis=1)] if len(non_white) == 0: return None # Convert to HSV hsv = cv2.cvtColor(non_white.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3) # Filter low saturation saturated_mask = (hsv[:, 1] > 30) saturated_pixels = non_white[saturated_mask] if len(saturated_pixels) == 0: saturated_pixels = non_white # Get mode color pixel_ints = (saturated_pixels[:, 0].astype(np.int32) + saturated_pixels[:, 1].astype(np.int32) * 256 + saturated_pixels[:, 2].astype(np.int32) * 65536) mode_int = np.bincount(pixel_ints).argmax() mode_color = np.array([ mode_int % 256, (mode_int // 256) % 256, (mode_int // 65536) % 256 ], dtype=np.uint8) return mode_color def process_and_group_edges(edges_bgra, color_threshold=30): """ Process edges: split multi-color edges, then group by color. Edges remain separate (segregated) even within groups. Returns: List of (group_image, mode_color, edge_count) tuples """ alpha = edges_bgra[:, :, 3] # Find connected components num_labels, labels = cv2.connectedComponents(alpha) print(f" Found {num_labels - 1} edges") if num_labels <= 1: return [] # Process each edge: split if multi-color all_edge_images = [] for edge_id in range(1, num_labels): edge_mask = (labels == edge_id) if not np.any(edge_mask): continue # Analyze colors has_multiple, num_colors, hues = analyze_edge_colors(edges_bgra, edge_mask) if has_multiple: print(f" Edge {edge_id}: {num_colors} colors detected, splitting...") # Split into sub-edges sub_edges = split_edge_by_color(edges_bgra, edge_mask, labels, edge_id, num_colors) all_edge_images.extend(sub_edges) else: # Keep as single edge edge_img = np.zeros_like(edges_bgra) edge_img[edge_mask] = edges_bgra[edge_mask] all_edge_images.append((edge_img, 0)) print(f" Total edges after splitting: {len(all_edge_images)}") # Get mode color for each edge edge_colors = [] for edge_img, cluster_id in all_edge_images: edge_mask = edge_img[:, :, 3] > 0 mode_color = get_edge_mode_color(edge_img, edge_mask) edge_colors.append(mode_color) # Group by similar colors groups = [] used_indices = set() for i, mode_color in enumerate(edge_colors): if i in used_indices or mode_color is None: continue # Start new group group_indices = [i] used_indices.add(i) # Find similar edges for j, other_color in enumerate(edge_colors): if j in used_indices or other_color is None: continue # Calculate color distance distance = np.linalg.norm(mode_color.astype(float) - other_color.astype(float)) if distance <= color_threshold: group_indices.append(j) used_indices.add(j) # Create group image (edges remain separate!) group_img = np.zeros_like(edges_bgra) for idx in group_indices: edge_img, _ = all_edge_images[idx] mask = edge_img[:, :, 3] > 0 group_img[mask] = edge_img[mask] groups.append((group_img, mode_color, len(group_indices))) print(f" Grouped into {len(groups)} color groups") return groups def process_pdf(pdf_path, output_dir, dpi=200): clear_output_directory(output_dir) print(f"Processing PDF: {pdf_path}") print(f"Converting at {dpi} DPI...\n") images = convert_from_path(pdf_path, dpi=dpi) print(f"Total pages: {len(images)}\n") for page_num, pil_image in enumerate(images, start=1): print(f"Page {page_num}/{len(images)}...") # Convert to BGR image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) # Enhance pastel colors print(f" - Enhancing pastel colors...") enhanced_image = enhance_pastel_colors(image) # Detect edges print(f" - Detecting edges...") edges = sobel_edge_detection(enhanced_image) # Process and group edges print(f" - Processing and grouping edges by color...") groups = process_and_group_edges(edges, color_threshold=30) # Save outputs base = f"page{page_num:03d}" cv2.imwrite(os.path.join(output_dir, f"{base}_original.png"), image) cv2.imwrite(os.path.join(output_dir, f"{base}_enhanced.png"), enhanced_image) cv2.imwrite(os.path.join(output_dir, f"{base}_edges.png"), edges) # Save each group for group_idx, (group_img, mode_color, edge_count) in enumerate(groups, start=1): path = os.path.join(output_dir, f"{base}_group{group_idx}.png") cv2.imwrite(path, group_img) print(f" Group {group_idx}: {edge_count} edges, mode color (BGR): {tuple(mode_color)}") print(f" - Saved {len(groups)} group images\n") print("Complete!") def main(): pdf_path = "input.pdf" output_dir = "output_sobel" if not os.path.exists(pdf_path): print(f"Error: '{pdf_path}' not found!") return 1 try: process_pdf(pdf_path, output_dir) return 0 except Exception as e: print(f"Error: {e}") import traceback traceback.print_exc() return 1 if __name__ == "__main__": exit(main())