diff --git a/README.md b/README.md index 6b1b38a..7a8c137 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,8 @@ I will use a dataset from TMBD https://www.kaggle.com/datasets/asaniczka/full-tmdb-tv-shows-dataset-2023-150k-shows ### Model: -I will use NearestNeighbors (NN) alhorithm together with K-NearestNeighbors alhorithm. +I must first preprocess data with vectorization so that i can use it in NearestNeighbors (NN) alhorithm. +I will use NearestNeighbors (NN) output as input to K-NearestNeighbors alhorithm. ### Features: 1. Load data from dataset and preprocessing. diff --git a/trainmodel.py b/trainmodel.py index 4e98ce5..667ee46 100644 --- a/trainmodel.py +++ b/trainmodel.py @@ -34,7 +34,7 @@ class TrainModel: # Preprocess title data preproccessed_data = self.preprocess_title_data() - # Train the NN model + # Train the NearestNeighbors model self.model.fit(preproccessed_data) stop = time.time() @@ -51,7 +51,7 @@ class TrainModel: # Preprocess target data target_vector = self.preprocess_target_data(target_row) - # Get nearest neighbors + # Use NearestNeighbors model as input to K-nearest neighbors distances, indices = self.model.kneighbors(target_vector, n_neighbors=num_recommendations) recommendations = self.title_data.iloc[indices[0]].copy() recommendations['distance'] = distances[0]