From 498e8425235fbeeb32a4504e54cc1f4d9acde266 Mon Sep 17 00:00:00 2001
From: jwradhe <jimmy@wradhe.com>
Date: Sat, 26 Oct 2024 20:43:13 +0200
Subject: [PATCH] Uppdatera classer

---
 main.py | 81 ++++++++++++---------------------------------------------
 1 file changed, 16 insertions(+), 65 deletions(-)

diff --git a/main.py b/main.py
index a04636b..b72d240 100644
--- a/main.py
+++ b/main.py
@@ -6,28 +6,19 @@ from sklearn.preprocessing import OneHotEncoder
 from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
 
+
 class LoadData:
 
     def __init__(self):
-        self.data_file = 'data_movies_series.csv'
         self.data = None
         self.loaded_datasets = []
 
     def check_data(self):
-        if os.path.isfile(self.data_file):
-            self.load_data()
-            return self.data
-        else:
-            self.create_data() 
-            if self.data is not None and not self.data.empty:
-                self.clean_data()
-                self.save_data()
-                num_rows = self.data.shape[0]
-                print(f'{num_rows} titles loaded successfully.')
-                return self.data
-            else:
-                print("Error: No data was created. Please check the dataset files.")
-                return None 
+        self.create_data()
+        self.clean_data()
+        num_rows = self.data.shape[0]
+        print(f'{num_rows} titles loaded successfully.')
+        return self.data
 
     def clean_text(self, text):
         if isinstance(text, str):
@@ -36,15 +27,8 @@ class LoadData:
             cleaned = cleaned.replace('"', '')
             return cleaned.strip()
         return '' 
-  
-    def clean_data(self):
-        string_columns = self.data.select_dtypes(include=['object'])
-        self.data[string_columns.columns] = string_columns.apply(lambda col: col.map(self.clean_text, na_action='ignore'))
-        self.data = self.data[~self.data['title'].str.strip().isin(['', ':'])]
-        print(f'Data cleaned successfully.')
 
     def load_dataset(self, dataset_path, stream):
-        print(f'dataset/{dataset_path}')
         try:
             df = pd.read_csv(f'dataset/{dataset_path}')
             df['stream'] = stream
@@ -76,16 +60,13 @@ class LoadData:
         df_all = df_all.infer_objects(copy=False)
         self.data = df_all
 
-        print(f'Data from {", ".join(self.loaded_datasets)} loaded successfully.')
+        print(f'Data from {", ".join(self.loaded_datasets)} imported.')
 
-    def save_data(self):
-        self.data.to_csv(self.data_file, index=False)
-        print(f'Data saved to {self.data_file} successfully.')
-
-    def load_data(self):
-        self.data = pd.read_csv(self.data_file)
-        num_rows = self.data.shape[0]
-        print(f'{num_rows} titles loaded successfully.')
+    def clean_data(self):
+        string_columns = self.data.select_dtypes(include=['object'])
+        self.data[string_columns.columns] = string_columns.apply(lambda col: col.map(self.clean_text, na_action='ignore'))
+        self.data = self.data[~self.data['title'].str.strip().isin(['', ':'])]
+        print(f'Data cleaned')
 
 
 class UserData:
@@ -98,46 +79,16 @@ class UserData:
         return self.user_data.lower()
 
 
-class Search:
-
-    def __init__(self, data):
-        self.data = data
-        self.preprocess()
-
-    def preprocess(self):
-        self.description_vectorizer = TfidfVectorizer(stop_words='english')
-        self.description_matrix = self.description_vectorizer.fit_transform(self.data['description'].fillna(''))
-        
-        self.onehot_encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
-        genres_type_matrix = self.onehot_encoder.fit_transform(self.data[['genres', 'type']].fillna(''))
-
-        self.feature_matrix = np.hstack([
-            self.description_matrix.toarray(),
-            genres_type_matrix,
-            self.data[['release_year']].fillna(0).to_numpy()
-        ])
-
-    def search(self, query, top_n=20):
-        query_vec = self.description_vectorizer.transform([query])
-        
-        if hasattr(query_vec, "toarray"):
-            query_vec = query_vec.toarray()
-        
-        similarity = cosine_similarity(query_vec, self.description_matrix).flatten()
-
-        top_indices = similarity.argsort()[-top_n:][::-1]
-        return self.data.iloc[top_indices][['title', 'genres', 'type', 'release_year', 'stream','description']]
-
-
 class Recommendations:
 
     def __init__(self):
         self.result = None
 
     def get_recommendations(self, user_data, title_data):
-        if title_data is not None and not title_data.empty:     
-            search_data = Search(title_data)
-            self.results = search_data.search(user_data)
+        if title_data is not None and not title_data.empty: 
+
+            self.results = "Här ska de komma rekommendationer"
+
             print(self.results)
         else:
             print("No data available to search.")