import folium
import random
import pandas as pd
import numpy as np
from folium.plugins import HeatMap, HeatMapWithTime
from ucimlrepo import fetch_ucirepo, list_available_datasets, dotdict, fetch
from datetime import datetime
import math
real_state_valuation = fetch_ucirepo(id=477)

df = real_state_valuation.data.original.copy()


def date_transformation(date):
    year = int(date.split('.')[0])
    month = int(date.split('.')[1])
    day = month
    proportion = 12 / 1000
    converted_month = math.floor(proportion * month)
    if converted_month == 0:
        converted_month = 1
    date = datetime(year=year, month=converted_month, day=1)
    return date


df['X1 transaction date'] = df['X1 transaction date'].apply(lambda date: date_transformation(str(date)))
df.drop(columns=['No'], inplace=True)
df['X1 transaction date'] = pd.to_datetime(df['X1 transaction date'])
df.sort_values(by='X1 transaction date', axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)
prices = df['Y house price of unit area'].to_list()
color_options = {
    "red",
    "darkred",
    "lightred",
    "orange",
    "beige",
    "green",
    "darkgreen",
    "lightgreen",
    "blue",
    "darkblue",
    "cadetblue",
    "lightblue",
    "purple",
    "darkpurple",
    "pink",
    "white",
    "gray",
    "lightgray",
    "black",
}

min_price = min(prices)
max_price = max(prices)
intermediate_price = np.median(prices)
num_categorias = 5
df.sort_values(by='Y house price of unit area', ascending=True, inplace=True)
df.reset_index(inplace=True, drop=True, )
# Categorizar los números en intervalos aproximadamente iguales
intervalos = [min_price, intermediate_price, max_price, np.inf]

etiquetas = ['Super Barata', 'Barata', 'Media', 'Cara', 'Super Cara']
colores = ['lightgreen', 'green', 'darkgreen', 'red', 'darkred']
calor = len(df.index.values)
pesos = df.index.values

# categorias = pd.cut(prices, bins=intervalos, labels=etiquetas)
df = df.assign(categorias=pd.qcut(df['Y house price of unit area'], q=num_categorias, labels=etiquetas))

df = df.assign(colores_zone=pd.qcut(df['Y house price of unit area'], q=num_categorias, labels=colores))

df['weights'] = (pesos + 1)

lat = df['X5 latitude'].to_list()
lon = df['X6 longitude'].to_list()

max_lat = max(lat) + 0.007
min_lat = min(lat) - 0.007
max_lon = max(lon) + 0.007
min_lon = min(lon) - 0.007

m = folium.Map(location=[24.96515, 121.53737], zoom_start=13,
               control_scale=True,
               scrollWheelZoom=False)

feature_gp1 = folium.FeatureGroup(name="icons")
feature_gp2 = folium.FeatureGroup(name="heat zone")

for i in df.index:
    latitud = df.loc[df.index == i]['X5 latitude'].to_list()[0]
    longitud = df.loc[df.index == i]['X6 longitude'].to_list()[0]
    peso = df.loc[df.index == i]['weights'].to_list()[0]
    price = df.loc[df.index == i]['Y house price of unit area'].to_list()[0]
    age = df.loc[df.index == i]['X2 house age'].to_list()[0]
    colores = df.loc[df.index == i]['colores_zone'].to_list()[0]
    icon_color = ["#" + ''.join([random.choice('ABCDEF0123456789') for i in range(6)])]
    iconos = folium.Icon(
        color=colores, prefix='fa', icon='fa-house',
        # icon_color=icon_color
    )

    marker = folium.Marker(
        location=[latitud, longitud],
        # coordinates for the marker (Earth Lab at CU Boulder)
        popup=(latitud, longitud),
        # pop-up label for the marker
        # icon=number_DivIcon(col_hex[num], num),
        icon=iconos,
        tooltip=f'Antiquity: {age} years')
    marker.add_to(feature_gp1)

ls = folium.PolyLine(
    locations=[[max_lat, max_lon],
               [max_lat, min_lon],
               [min_lat, min_lon],
               [min_lat, max_lon],
               [max_lat, max_lon]], color="purple"
)

df.sort_values(by='weights', ascending=False)
my_list = [[df[df.index == i]['X5 latitude'].to_list()[0], df[df.index == i]['X6 longitude'].to_list()[0],
            df[df.index == i]['weights'].to_list()[0],
            ] for i in df.index.values]
feature_gp2.add_child(HeatMap(data=my_list, radius=26))

# print(my_list)
# feature_gp2.add_child(HeatMap(data=my_list[:5] , radius=25, blur = 1, min_opacity = 0.5, ))
# m = folium.Map((24.96172, 121.53812), zoom_start=12,)

ls.add_child(folium.Popup("outline Popup on Polyline"))
m.add_child(ls)
fg = folium.FeatureGroup(show=False)
feature_gp2.add_to(m)
feature_gp1.add_to(m)
folium.LayerControl().add_to(m)
# m = folium.Map((24.96515, 121.53737), zoom_start=14, tiles=None, control_scale=True,
#                scrollWheelZoom=False)
# HeatMap(
#     # make five dots with different weights: 1, 2, 3, 4 and 5
#     data=my_list[:50], radius=25, blur=10, min_opacity=0).add_to(m)
folium.LatLngPopup().add_to(m)

m

import osmnx as ox
from geopy.geocoders import Photon

geolocator = Photon(user_agent="measurements")

latitud = str(df.loc[df['X2 house age'] == 13.9]['X5 latitude'].to_list()[0])
longitud = str(df.loc[df['X2 house age'] == 13.9]['X6 longitude'].to_list()[0])

location = geolocator.reverse(latitud + "," + longitud)
address = location.raw['properties']['city']
print(address)
# ox.config(use_cache=True, log_console=True)
ox.settings.log_console = True
ox.settings.use_cache = True

# define the place query
query = {'city': address}

# get the boundaries of the place
gdf = ox.geocode_to_gdf(query)
gdf.plot()

新北市

<Axes: >

df.columns = ['transaction_date', 'age', 'distance_to_the_nearest_MRT_station', 'number_of_convenience_stores', 'lat',
              'lon', 'price', 'category', 'color', 'weights']
# print(df.to_string())
muy_baratas = df.groupby(by='category', observed=True).agg(
    {'number_of_convenience_stores': 'mean', 'distance_to_the_nearest_MRT_station': 'mean', 'price': 'mean',
     'age': 'mean'})
muy_baratas['category'] = muy_baratas.index.values
print(muy_baratas.to_string())
import plotly.express as px

# df = px.data.gapminder()

fig = px.scatter(muy_baratas, x="age", y="price",
                 size="number_of_convenience_stores",
                 hover_name="category", log_x=True, size_max=100, color='category')
fig.show('png')

              number_of_convenience_stores  distance_to_the_nearest_MRT_station      price        age      category
category                                                                                                           
Super Barata                      1.240964                          2804.101812  19.802410  19.500000  Super Barata
Barata                            2.879518                          1354.403169  29.953012  17.818072        Barata
Media                             4.771084                           542.854812  38.563855  23.844578         Media
Cara                              5.548780                           450.979636  44.628049  16.601220          Cara
Super Cara                        6.048193                           259.463642  57.033735  10.785542    Super Cara

import numpy as np
import math
import scipy.spatial.distance as distance
import geopy.distance

# Supongamos que tienes una lista de coordenadas (latitud, longitud)
# points = [(lat1, lon1), (lat2, lon2), ...]
points = list(zip(df['lat'], df['lon']))


# df['geometry'] = [Point(lat, lon) for lon, lat in zip(df['lat'], df['lon'])]
# 1. Calcular la matriz de distancias
def get_distance(point1, point2):
    R = 6371  # Radio de la Tierra en kilómetros
    lat1, lon1 = np.radians(point1)
    lat2, lon2 = np.radians(point2)
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c


all_points = np.array(points)

dist_matrix = distance.cdist(all_points, all_points, get_distance)
center_index = np.argmin(dist_matrix.sum(axis=1))
center_point = points[center_index]

radius = 1000
from haversine import haversine, Unit


def calcular_nuevas_coordenadas(latitud, longitud, distancia_vertical, distancia_horizontal):
    nueva_latitud = latitud + (distancia_vertical / 111.32)  # Aproximadamente 111.32 km por grado de latitud

    nueva_longitud = longitud + (distancia_horizontal / (100))
    return nueva_latitud, nueva_longitud


centro_circunferencia = (center_point[0], center_point[1])

lat_axis, lon_axis = calcular_nuevas_coordenadas(center_point[0], center_point[1], distancia_vertical=1,
                                                 distancia_horizontal=1)
lat_axis_ngtv, lon_axis_ngtv = calcular_nuevas_coordenadas(center_point[0], center_point[1], distancia_vertical=-1,
                                                           distancia_horizontal=-1)

x_limit = lat_axis - center_point[0]
y_limit = lon_axis - center_point[1]
selected_points = df.copy()
for i in selected_points.index.values:
    latitud = selected_points.loc[selected_points.index == i]['lat'].to_list()[0]
    longitud = selected_points.loc[selected_points.index == i]['lon'].to_list()[0]
    if geopy.distance.geodesic((latitud, longitud), (center_point[0], center_point[1])).kilometers > 1:
        selected_points.drop(axis=0, index=i, inplace=True)
    else:
        continue

df.columns = ['transaction_date', 'age', 'distance_to_the_nearest_MRT_station', 'number_of_convenience_stores', 'lat',
              'lon', 'price', 'category', 'color', 'weights']

m = folium.Map(location=[24.96515, 121.53737], zoom_start=13,
               control_scale=True,
               scrollWheelZoom=False)
for i in selected_points.index:
    latitud = selected_points.loc[selected_points.index == i]['lat'].to_list()[0]
    longitud = selected_points.loc[selected_points.index == i]['lon'].to_list()[0]
    peso = selected_points.loc[selected_points.index == i]['weights'].to_list()[0]
    price = selected_points.loc[selected_points.index == i]['price'].to_list()[0]
    age = selected_points.loc[selected_points.index == i]['age'].to_list()[0]
    colores = selected_points.loc[selected_points.index == i]['color'].to_list()[0]
    # icon_color = ["#" + ''.join([random.choice('ABCDEF0123456789') for i in range(6)])]
    iconos = folium.Icon(
        color=colores, prefix='fa', icon='fa-house',
    )

    marker = folium.Marker(
        location=[latitud, longitud],
        popup=peso,
        icon=iconos,
        tooltip=f'Antiquity: {age} years')
    marker.add_to(m)

iconos = folium.Icon(
    color='pink', prefix='fa', icon='fa-house', )

folium.Circle(
    location=[center_point[0], center_point[1]],
    radius=radius,
    fill_color="cornflowerblue",

    fill=False,
    popup="{} km".format(radius),
).add_to(m)

m

selected_points_agg = selected_points.groupby(by='category', observed=True).agg(
    {'number_of_convenience_stores': 'mean', 'distance_to_the_nearest_MRT_station': 'mean', 'price': 'mean',
     'age': 'mean'})
print(selected_points_agg.to_string())

            number_of_convenience_stores  distance_to_the_nearest_MRT_station      price        age
category                                                                                           
Barata                          3.950000                           446.603880  31.370000  26.170000
Media                           5.070175                           443.935544  38.726316  25.689474
Cara                            6.070175                           317.435891  44.433333  17.573684
Super Cara                      5.983051                           252.142486  58.230508  10.827119

casas_nuevas = selected_points.copy()
casas_nuevas.drop(columns=['transaction_date', 'category', 'color', 'weights'], axis=1, inplace=True)
print(casas_nuevas.corr().to_string())

                                          age  distance_to_the_nearest_MRT_station  number_of_convenience_stores       lat       lon     price
age                                  1.000000                             0.273330                      0.074861  0.138400 -0.109839 -0.326439
distance_to_the_nearest_MRT_station  0.273330                             1.000000                     -0.260047  0.225971 -0.108378 -0.343903
number_of_convenience_stores         0.074861                            -0.260047                      1.000000  0.056987  0.690952  0.167605
lat                                  0.138400                             0.225971                      0.056987  1.000000 -0.078110  0.084663
lon                                 -0.109839                            -0.108378                      0.690952 -0.078110  1.000000  0.204144
price                               -0.326439                            -0.343903                      0.167605  0.084663  0.204144  1.000000

import seaborn as sns
import matplotlib.pyplot as plt

sns.pairplot(selected_points, diag_kind='kde')
plt.show()

import seaborn as sns
import matplotlib.pyplot as plt

sns.histplot(selected_points['age'], bins=20, )
plt.xlabel('Edad de la casa')
plt.ylabel('Frecuencia')
plt.title('Distribución de la edad de las casas')
plt.show()

sns.scatterplot(x='age', y='price', data=selected_points)
plt.xlabel('Edad')
plt.ylabel(
    'Precio de la casa'
)
plt.title('Relación entre edad y precio')
plt.show()

from sklearn.cluster import DBSCAN

X = [[df[df.index == i]['age'].to_list()[0], df[df.index == i]['price'].to_list()[0],
            ] for i in df.index.values]
X = np.array(X)


db = DBSCAN(eps=9, min_samples=10).fit(X)
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

# print("Number of clusters: %d" % n_clusters_)
# print("Number of noise points: %d" % n_noise_)
unique_labels = set(labels)
core_samples_mask = np.zeros_like(labels, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]

indices_falsos = []

# Itera sobre la lista de booleanos
for i, valor in enumerate(list(labels)):
    if valor:
        indices_falsos.append(i)

for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = [0, 0, 0, 1]

    class_member_mask = labels == k
    xy_ = X[class_member_mask & core_samples_mask]
    # print(class_member_mask & core_samples_mask)
    # print(xy)
    plt.plot(
        xy_[:, 0],
        xy_[:, 1],
        "o",
        markerfacecolor=tuple(col),
        markeredgecolor="k",
        markersize=14,
    )
    # print(class_member_mask & ~core_samples_mask)
    # print(xy)
    xy_outliers = X[class_member_mask & ~core_samples_mask]
    print(xy_outliers)
    plt.plot(
        xy_outliers[:, 0],
        xy_outliers[:, 1],
        "o",
        markerfacecolor=tuple(col),
        markeredgecolor="k",
        markersize=6,
    )
    

plt.title(f"Number of clusters: {n_clusters_}")
plt.show()

[[35.9 61.5]
 [ 0.  69.7]
 [ 0.  70.1]
 [ 0.  71. ]]
[[ 41.3  60.7]
 [ 38.6  62.9]
 [ 41.4  63.3]
 [ 40.9  67.7]
 [  0.   73.6]
 [ 35.4  78. ]
 [ 37.2  78.3]
 [ 10.8 117.5]]

outliers = df.loc[indices_falsos]
outliers

df.drop(axis=0, index=indices_falsos, inplace=True)
casas_nuevas.drop(axis=0, index=indices_falsos, inplace=True)

new_cov = df.copy()
new_cov.drop(columns=['transaction_date', 'category', 'color', 'weights'], axis=1, inplace=True)
print(new_cov.corr().to_string())

                                          age  distance_to_the_nearest_MRT_station  number_of_convenience_stores       lat       lon     price
age                                  1.000000                             0.040930                      0.021610  0.052362 -0.060542 -0.288220
distance_to_the_nearest_MRT_station  0.040930                             1.000000                     -0.602641 -0.591282 -0.806179 -0.708534
number_of_convenience_stores         0.021610                            -0.602641                      1.000000  0.447611  0.445918  0.606093
lat                                  0.052362                            -0.591282                      0.447611  1.000000  0.412476  0.582044
lon                                 -0.060542                            -0.806179                      0.445918  0.412476  1.000000  0.559409
price                               -0.288220                            -0.708534                      0.606093  0.582044  0.559409  1.000000

print(casas_nuevas.corr().to_string())

                                          age  distance_to_the_nearest_MRT_station  number_of_convenience_stores       lat       lon     price
age                                  1.000000                             0.303168                      0.042480  0.167210 -0.119470 -0.526540
distance_to_the_nearest_MRT_station  0.303168                             1.000000                     -0.253274  0.210262 -0.095022 -0.404114
number_of_convenience_stores         0.042480                            -0.253274                      1.000000  0.070082  0.687840  0.223867
lat                                  0.167210                             0.210262                      0.070082  1.000000 -0.074338  0.102738
lon                                 -0.119470                            -0.095022                      0.687840 -0.074338  1.000000  0.336239
price                               -0.526540                            -0.404114                      0.223867  0.102738  0.336239  1.000000

import plotly.graph_objects as go


muy_baratas.reset_index(drop=True, inplace=True)
muy_baratas['loc'] = muy_baratas.index.values + 1
muy_baratas.sort_values(by='price', axis=0, inplace=True, ascending=True)

print(muy_baratas)
dimensions = list([ dict(range=(muy_baratas['loc'].max(), 1),
                       tickvals = muy_baratas['loc'], ticktext = muy_baratas['category'],
                       label='Categoría', values=muy_baratas['loc']),
                    
                    
                    dict(range=(0,muy_baratas['price'].max()), label='Precio', values=muy_baratas['price']),
                    dict(range=(0,muy_baratas['number_of_convenience_stores'].max()), label='Tiendas de conveniencia', values=muy_baratas['number_of_convenience_stores']),
                    dict(range=(0,muy_baratas['distance_to_the_nearest_MRT_station'].max()), label='Distancia a la est. mas cercana', values=muy_baratas['distance_to_the_nearest_MRT_station']),
                    dict(range=(0,muy_baratas['age'].max()), label='Edad', values=muy_baratas['age']),
                  ])
fig = go.Figure(data= go.Parcoords(line = dict(color =0, colorscale = 'agsunset'), dimensions = dimensions))
fig.update_layout(width=800, height=400,margin=dict(l=150, r=60, t=60, b=40))
fig.show("png")

   number_of_convenience_stores  distance_to_the_nearest_MRT_station  \
0                      1.240964                          2804.101812   
1                      2.879518                          1354.403169   
2                      4.771084                           542.854812   
3                      5.548780                           450.979636   
4                      6.048193                           259.463642   

       price        age      category  loc  
0  19.802410  19.500000  Super Barata    1  
1  29.953012  17.818072        Barata    2  
2  38.563855  23.844578         Media    3  
3  44.628049  16.601220          Cara    4  
4  57.033735  10.785542    Super Cara    5

	transaction_date	age	distance_to_the_nearest_MRT_station	number_of_convenience_stores	lat	lon	price	category	color	weights
395	2013-01-01	41.3	124.9912	6	24.96674	121.54039	60.7	Super Cara	darkred	396
401	2013-01-01	38.6	804.6897	4	24.97838	121.53477	62.9	Super Cara	darkred	402
404	2013-01-01	41.4	281.2050	8	24.97345	121.54093	63.3	Super Cara	darkred	405
406	2013-01-01	40.9	122.3619	8	24.96756	121.54230	67.7	Super Cara	darkred	407
410	2013-05-01	0.0	292.9978	6	24.97744	121.54458	73.6	Super Cara	darkred	411
411	2013-06-01	35.4	318.5292	9	24.97071	121.54069	78.0	Super Cara	darkred	412
412	2013-03-01	37.2	186.5101	9	24.97703	121.54265	78.3	Super Cara	darkred	413
413	2013-03-01	10.8	252.5822	1	24.97460	121.53046	117.5	Super Cara	darkred	414

Real State Analysis¶