Config Dashbprard

This commit is contained in:
2026-02-23 18:19:50 +01:00
parent 118e3e63b3
commit 816c50e467
2 changed files with 124 additions and 97 deletions

View File

@@ -2326,12 +2326,12 @@ def correlation_scatter_raw_certainty_json1_reference(
xs = np.linspace(np.nanmin(x), np.nanmax(x), 200) xs = np.linspace(np.nanmin(x), np.nanmax(x), 200)
ax.plot(xs, a * xs + b, linestyle="--", linewidth=2.5, color=trend_color) ax.plot(xs, a * xs + b, linestyle="--", linewidth=2.5, color=trend_color)
ax.set_xlabel("certainty_percent (from JSON 1, per key)") ax.set_xlabel("certainty percent")
ax.set_ylabel("Absolute Error |EDSS_pred EDSS_gt|" if y_mode == "abs" else "Signed Error (EDSS_pred EDSS_gt)") ax.set_ylabel("Absolute Error" if y_mode == "abs" else "Signed Error (EDSS_pred EDSS_gt)")
ax.set_title( # ax.set_title(
f"Correlation: JSON1 certainty_percent vs {y_col} (All iterations)\n" # f"Correlation: JSON1 certainty_percent vs {y_col} (All iterations)\n"
f"Pearson r={pearson:.3f} | Spearman ρ={spearman:.3f}" # f"Pearson r={pearson:.3f} | Spearman ρ={spearman:.3f}"
) # )
ax.grid(linestyle=":", alpha=0.5) ax.grid(linestyle=":", alpha=0.5)
# Colorbar # Colorbar
@@ -2362,7 +2362,7 @@ correlation_scatter_raw_certainty_json1_reference(
json1_file_path=json1_path, json1_file_path=json1_path,
ground_truth_path="/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv", ground_truth_path="/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv",
y_mode="abs", y_mode="abs",
save_svg_path="/home/shahin/Lab/Doktorarbeit/Barcelona/results/corr_json1_abs_error.svg" # save_svg_path="/home/shahin/Lab/Doktorarbeit/Barcelona/results/corr_json1_abs_error.svg"
) )
## ##

View File

@@ -718,128 +718,155 @@ plt.show()
## ##
# %% Dashboard # %% Dashboard
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns import matplotlib.dates as mdates
from datetime import datetime
import numpy as np import numpy as np
from matplotlib.gridspec import GridSpec
def to_numeric_comma(s: pd.Series) -> pd.Series:
# accepts 1.5 and 1,5
return pd.to_numeric(s.astype(str).str.replace(",", ".", regex=False), errors="coerce")
# Load the data # Load the data
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv' file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
df = pd.read_csv(file_path, sep='\t') df = pd.read_csv(file_path, sep='\t')
# Rename columns to remove 'result.' prefix and handle spaces # Rename columns to remove 'result.' prefix and replace spaces
column_mapping = {} column_mapping = {}
for col in df.columns: for col in df.columns:
if col.startswith('result.'): if col.startswith('result.'):
new_name = col.replace('result.', '') new_name = col.replace('result.', '').replace(' ', '_')
# Handle spaces in column names (replace with underscores if needed)
new_name = new_name.replace(' ', '_')
column_mapping[col] = new_name column_mapping[col] = new_name
df = df.rename(columns=column_mapping) df = df.rename(columns=column_mapping)
# Convert MedDatum to datetime # Parse MedDatum safely
df['MedDatum'] = pd.to_datetime(df['MedDatum']) df['MedDatum'] = pd.to_datetime(df['MedDatum'], errors='coerce')
# Check what columns actually exist in the dataset # Patient
print("Available columns:") patient_id = '6389d658'
print(df.columns.tolist()) patient_data = df[df['unique_id'] == patient_id].sort_values('MedDatum').copy()
print("\nFirst few rows:") if patient_data.empty:
print(df.head()) raise ValueError(f"No data found for patient: {patient_id}")
# Hardcode specific patient names # Functional systems + EDSS
patient_names = ['2bf8486d'] edss_col, edss_title = ('GT.EDSS', 'EDSS')
# Define the functional systems (columns to plot) - adjust based on actual column names functional_systems = [
functional_systems = ['EDSS', 'Visual', 'Sensory', 'Motor', 'Brainstem', 'Cerebellar', 'Autonomic', 'Bladder', 'Intellectual'] ('GT.VISUAL_OPTIC_FUNCTIONS', 'Visual / Optic'),
('GT.CEREBELLAR_FUNCTIONS', 'Cerebellar'),
('GT.BRAINSTEM_FUNCTIONS', 'Brainstem'),
('GT.SENSORY_FUNCTIONS', 'Sensory'),
('GT.PYRAMIDAL_FUNCTIONS', 'Pyramidal (Motor)'),
('GT.AMBULATION', 'Ambulation'),
('GT.CEREBRAL_FUNCTIONS', 'Cerebral'),
('GT.BOWEL_AND_BLADDER_FUNCTIONS', 'Bowel & Bladder'),
]
# Create subplots horizontally (2 columns, adjust rows as needed) # y-axis max rules
num_plots = len(functional_systems) ymax_by_col = {
num_cols = 2 'GT.PYRAMIDAL_FUNCTIONS': 6,
num_rows = (num_plots + num_cols - 1) // num_cols # Ceiling division 'GT.SENSORY_FUNCTIONS': 6,
'GT.BOWEL_AND_BLADDER_FUNCTIONS': 6,
'GT.VISUAL_OPTIC_FUNCTIONS': 6,
'GT.CEREBELLAR_FUNCTIONS': 5,
'GT.CEREBRAL_FUNCTIONS': 5,
'GT.BRAINSTEM_FUNCTIONS': 5,
'GT.EDSS': 10,
}
default_ymax = 6
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 4*num_rows), sharex=False) # Changed sharex=False # ---------- Build shared "event dates" ticks ----------
if num_plots == 1: cols_for_dates = [edss_col] + [c for c, _ in functional_systems]
axes = [axes] event_dates = []
elif num_rows == 1:
axes = axes for c in cols_for_dates:
if c in patient_data.columns:
y = to_numeric_comma(patient_data[c]) # <-- changed
x = patient_data['MedDatum']
tmp = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"])
event_dates.extend(tmp["x"].tolist())
event_dates = sorted(pd.Series(event_dates).drop_duplicates().tolist())
max_ticks = 8
if len(event_dates) > max_ticks:
idx = np.linspace(0, len(event_dates) - 1, max_ticks, dtype=int)
event_dates = [event_dates[i] for i in idx]
# ---------- A4 figure ----------
fig = plt.figure(figsize=(11.69, 8.27))
gs = GridSpec(nrows=3, ncols=4, figure=fig, height_ratios=[2.0, 1.0, 1.0], hspace=0.5, wspace=0.35)
def style_time_axis(ax, show_labels=True):
ax.set_xticks(event_dates)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax.tick_params(axis='x', rotation=30, labelsize=8, pad=2)
if not show_labels:
ax.tick_params(labelbottom=False)
# ---------- EDSS main plot ----------
ax_main = fig.add_subplot(gs[0, :])
if edss_col in patient_data.columns:
y = to_numeric_comma(patient_data[edss_col]) # <-- changed
x = patient_data['MedDatum']
plot_df = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"]).sort_values("x")
ax_main.set_title(edss_title, fontsize=14, fontweight='bold')
ax_main.set_ylabel("Score")
ax_main.set_ylim(0, ymax_by_col.get(edss_col, default_ymax))
ax_main.grid(True, alpha=0.3)
if not plot_df.empty:
ax_main.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=3, color='tab:red')
else:
ax_main.set_title("EDSS (no numeric data)", fontsize=14, fontweight='bold')
else: else:
axes = axes.flatten() ax_main.set_title("EDSS (missing column GT.EDSS)", fontsize=14, fontweight='bold')
ax_main.set_ylim(0, ymax_by_col.get(edss_col, 10))
ax_main.grid(True, alpha=0.3)
# Plot for the hardcoded patient style_time_axis(ax_main)
for i, system in enumerate(functional_systems):
# Filter data for this specific patient
patient_data = df[df['unique_id'] == patient_names[0]].sort_values('MedDatum')
# Check if patient data exists # ---------- Small aligned plots ----------
if patient_data.empty: small_axes = []
print(f"No data found for patient: {patient_names[0]}") for k, (col, title) in enumerate(functional_systems):
continue r = 1 + (k // 4)
c = (k % 4)
ax = fig.add_subplot(gs[r, c], sharex=ax_main)
small_axes.append(ax)
# Check if the system column exists in the data ymax = ymax_by_col.get(col, default_ymax)
if system in patient_data.columns: ax.set_title(title, fontsize=10)
# Plot the specific functional system ax.set_ylabel("Score")
if not patient_data[system].isna().all(): ax.set_ylim(0, ymax)
axes[i].plot(patient_data['MedDatum'], patient_data[system], marker='o', linewidth=2, label=system) ax.grid(True, alpha=0.3)
axes[i].set_ylabel('Score')
axes[i].set_title(f'Functional System: {system}') if col in patient_data.columns:
axes[i].grid(True, alpha=0.3) y = to_numeric_comma(patient_data[col]) # <-- changed
axes[i].legend() x = patient_data['MedDatum']
plot_df = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"]).sort_values("x")
if not plot_df.empty:
ax.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=2, color='tab:blue')
else: else:
axes[i].set_title(f'Functional System: {system} (No data)') ax.set_title(f"{title} (no data)", fontsize=10)
axes[i].set_ylabel('Score')
axes[i].grid(True, alpha=0.3)
else: else:
# Try to find column with similar name (case insensitive) ax.set_title(f"{title} (missing)", fontsize=10)
found_column = None
for col in df.columns:
if system.lower() in col.lower():
found_column = col
break
if found_column: style_time_axis(ax)
print(f"Found similar column: {found_column}")
if not patient_data[found_column].isna().all():
axes[i].plot(patient_data['MedDatum'], patient_data[found_column], marker='o', linewidth=2, label=found_column)
axes[i].set_ylabel('Score')
axes[i].set_title(f'Functional System: {system} (found as: {found_column})')
axes[i].grid(True, alpha=0.3)
axes[i].legend()
else:
axes[i].set_title(f'Functional System: {system} (Column not found)')
axes[i].set_ylabel('Score')
axes[i].grid(True, alpha=0.3)
# Hide empty subplots # Hide x tick labels on first row of small plots
for i in range(len(functional_systems), len(axes)): for ax in small_axes[:4]:
axes[i].set_visible(False) ax.tick_params(labelbottom=False)
# Set x-axis label for the last row only
for i in range(len(functional_systems)):
if i >= len(axes) - num_cols: # Last row
axes[i].set_xlabel('Date')
# Force date formatting on all axes
for ax in axes:
ax.tick_params(axis='x', rotation=45)
ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())
# Automatically format x-axis dates
plt.gcf().autofmt_xdate()
plt.tight_layout() plt.tight_layout()
fig.subplots_adjust(hspace=0.7)
plt.show() plt.show()
## ##
# %% Table # %% Table
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt