Skip to content

Commit

Permalink
Merge pull request #6 from Stealeristaken/deneme
Browse files Browse the repository at this point in the history
model option for image embeddings
  • Loading branch information
cobanov authored Mar 14, 2024
2 parents fbd37f5 + 4203446 commit 3716ce2
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 7 deletions.
11 changes: 8 additions & 3 deletions tasnif/calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,20 @@
from .logger import info


def get_embeddings(use_gpu=False, images=None):
def get_embeddings(use_gpu=False, images=None, model='resnet-18'):
"""
This Python function initializes an Img2Vec object, runs it on either GPU or CPU, and retrieves
image embeddings.
:param use_gpu: The `use_gpu` parameter is a boolean that specifies whether to use GPU or CPU.
:param images: The `images` parameter is a list of image paths to be used for generating embeddings.
:param model: The `model` parameter is a string that specifies the model to use for generating.
For available models, see https://github.com/christiansafka/img2vec
:return: The function `get_embeddings` returns the embeddings of the images as np.ndarray.
"""

info(f"Img2Vec is running on {'GPU' if use_gpu else 'CPU'}...")
img2vec = Img2Vec(cuda=use_gpu)

img2vec = Img2Vec(cuda=use_gpu, model=model)
print(f"Using model: {model}")
embeddings = img2vec.get_vec(images, tensor=False)
return embeddings

Expand Down
6 changes: 4 additions & 2 deletions tasnif/tasnif.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,23 @@ def read(self, folder_path):
self.image_paths = read_images_from_directory(folder_path)
self.images = read_with_pil(self.image_paths)

def calculate(self, pca=True, iter=10):
def calculate(self, pca=True, iter=10, model="resnet-18"):
"""
The function calculates embeddings, performs PCA, and applies K-means clustering to the
embeddings. It will not perform these operations if no images have been read.
:param pca: The `pca` parameter is a boolean that specifies whether to perform PCA or not. Default is True
:param iter: The `iter` parameter is an integer that specifies the number of iterations for the KMeans algorithm. Default is 10.
:param model: The `model` parameter is a string that specifies the model to use for generating embeddings. Default is 'resnet-18'.
For available models, see https://github.com/christiansafka/img2vec
"""

if not self.images:
raise ValueError(
"The images list can not be empty. Please call the read method before calculating."
)

self.embeddings = get_embeddings(use_gpu=self.use_gpu, images=self.images)
self.embeddings = get_embeddings(use_gpu=self.use_gpu, images=self.images, model=model)
if pca:
self.pca_embeddings = calculate_pca(self.embeddings, self.pca_dim)
self.centroid, self.labels, self.counts = calculate_kmeans(
Expand Down
4 changes: 2 additions & 2 deletions tests/test_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
def test_get_embeddings():
image_paths = read_images_from_directory("tests/test_images")
images = read_with_pil(image_paths)
embeddings = get_embeddings(images=images, use_gpu=False)
embeddings = get_embeddings(images=images, use_gpu=False, model="resnet-18")
assert embeddings is not None, "Embeddings were not generated."


Expand All @@ -20,7 +20,7 @@ def test_calculate_pca():
def test_calculate_kmeans():
pca_embeddings = np.random.rand(10, 16)
num_classes = 2
centroid, labels, counts = calculate_kmeans(pca_embeddings, num_classes)
centroid, labels, counts = calculate_kmeans(pca_embeddings, num_classes, iter=10)
assert (
len(set(labels)) == num_classes
), "K-means did not cluster into the expected number of classes."

0 comments on commit 3716ce2

Please sign in to comment.