Quellcode für meipi.indexing.embedding

"""Helpers for batching images and generating image embeddings.

This module provides small, model-agnostic utilities used by indexing jobs:

- pre-processing image inputs into model-compatible batches
- running batched forward passes on a selected device
- collecting pooled vectors as ``numpy.ndarray``
"""

from typing import List
from itertools import batched
import gc
from tqdm.auto import tqdm
import numpy as np
from torch.utils.data import default_collate
import torch
from transformers import AutoImageProcessor, BatchFeature, BaseImageProcessor
from transformers.image_utils import ImageInput


[Doku] def check_cuda_memory() -> None: """Print all currently alive CUDA tensors for debugging memory usage.""" for obj in filter(lambda o: isinstance(o, torch.Tensor), gc.get_objects()): if obj.device.type == "cuda": print(type(obj), obj.size(), obj.device)
[Doku] def create_image_batches( images: ImageInput, model_name: str, batch_size: int ) -> List[BatchFeature]: """Create model-ready image batches using the matching HuggingFace processor. Args: images: Raw image inputs accepted by ``transformers``. model_name: HuggingFace model id used to resolve ``AutoImageProcessor``. batch_size: Number of samples per returned batch. Returns: A list of ``BatchFeature`` objects containing ``pixel_values`` tensors. """ image_processor: BaseImageProcessor = AutoImageProcessor.from_pretrained( model_name, use_fast=True ) inputs = image_processor(images) batches = batched(inputs["pixel_values"], batch_size) return [ BatchFeature(data={"pixel_values": default_collate(list(batch))}) for batch in batches ]
@torch.no_grad() def generate_image_embeddings( model, inp_batches: List[BatchFeature], device="cuda" ) -> np.ndarray: """Generate pooled embeddings for all batches and return one stacked array. The model is temporarily moved to ``device`` for inference and restored to its original device afterwards. """ olddev = model.device model.to(device) embeddings = [] for batch in tqdm(inp_batches): batch.to(device) out = model(**batch, output_hidden_states=True, output_attentions=False) vector = out.pooler_output.detach().cpu().numpy().squeeze() batch.to("cpu", non_blocking=True) del out, batch torch.cuda.empty_cache() embeddings.append(vector) model.to(olddev) return np.vstack(embeddings)