certainty Delta show

2026-02-18 17:12:31 +01:00
parent f1d22b28ad
commit 424d38ad1c
1 changed files with 306 additions and 1 deletions
@@ -1214,6 +1214,307 @@ def plot_error_distribution_by_confidence(json_dir_path, ground_truth_path):
 # plot_error_distribution_by_confidence('jsons_folder/', 'ground_truth.csv')
 ##
 # %% Certainty vs Delta
 import pandas as pd
 import numpy as np
 import json
 import glob
 import os
 import matplotlib.pyplot as plt
 import seaborn as sns
 from matplotlib.patches import Patch
 from matplotlib.lines import Line2D
 def plot_confidence_vs_abs_error_refined(json_dir_path, ground_truth_path):
    # 1. Load and Merge Data
    df_gt = pd.read_csv(ground_truth_path, sep=';')
    df_gt['unique_id'] = df_gt['unique_id'].astype(str).str.strip().str.lower()
    df_gt['MedDatum'] = df_gt['MedDatum'].astype(str).str.strip().str.lower()
    df_gt['EDSS_gt'] = pd.to_numeric(df_gt['EDSS'], errors='coerce')
    all_preds = []
    for file_path in glob.glob(os.path.join(json_dir_path, "*.json")):
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            for entry in data:
                if entry.get("success"):
                    res = entry["result"]
                    all_preds.append({
                        'unique_id': str(res.get('unique_id')).strip().lower(),
                        'MedDatum': str(res.get('MedDatum')).strip().lower(),
                        'EDSS_pred': pd.to_numeric(res.get('EDSS'), errors='coerce'),
                        'confidence': pd.to_numeric(res.get('certainty_percent'), errors='coerce')
                    })
    df_merged = pd.merge(pd.DataFrame(all_preds), df_gt[['unique_id', 'MedDatum', 'EDSS_gt']], 
                         on=['unique_id', 'MedDatum'], how='inner').dropna()
    # 2. Calculate Absolute Delta
    df_merged['abs_error'] = (df_merged['EDSS_pred'] - df_merged['EDSS_gt']).abs()
    # 3. Binning
    bins = [0, 70, 80, 90, 100]
    labels = ['Low (<70%)', 'Moderate (70-80%)', 'High (80-90%)', 'Very High (90-100%)']
    df_merged['conf_bin'] = pd.cut(df_merged['confidence'], bins=bins, labels=labels)
    stats = df_merged.groupby('conf_bin', observed=True)['abs_error'].agg(['mean', 'std', 'count']).reset_index()
    # 4. Plotting
    plt.figure(figsize=(12, 8))
    # Sequential palette: light to dark
    colors = sns.color_palette("Blues", n_colors=len(labels)) 
    bars = plt.bar(stats['conf_bin'], stats['mean'], color=colors, edgecolor='black', linewidth=1.2)
    # Standard Error Bars
    plt.errorbar(stats['conf_bin'], stats['mean'], 
                 yerr=stats['std']/np.sqrt(stats['count']), 
                 fmt='none', c='black', capsize=6, elinewidth=1.5)
    # Trend Line (Linear Fit)
    x_idx = np.arange(len(labels))
    z = np.polyfit(x_idx, stats['mean'], 1)
    p = np.poly1d(z)
    plt.plot(x_idx, p(x_idx), color="#e74c3c", linestyle="--", linewidth=2.5)
    # 5. THE COMPLETE LEGEND
    # We create a specific handle for every single thing on the chart
    legend_elements = [
        # Explicit color mapping for bins
        Patch(facecolor=colors[0], edgecolor='black', label=f'Bin 1: {labels[0]}'),
        Patch(facecolor=colors[1], edgecolor='black', label=f'Bin 2: {labels[1]}'),
        Patch(facecolor=colors[2], edgecolor='black', label=f'Bin 3: {labels[2]}'),
        Patch(facecolor=colors[3], edgecolor='black', label=f'Bin 4: {labels[3]}'),
        # Statistical components
        Line2D([0], [0], color='black', marker='_', linestyle='None', markersize=10, label='Standard Error (SEM)'),
        Line2D([0], [0], color='#e74c3c', linestyle='--', lw=2.5, label='Correlation Trend (Inverse Rel.)'),
        # Metric definition
        Patch(color='none', label='Metric: Mean Absolute Error (MAE)')
    ]
    plt.legend(handles=legend_elements, loc='upper right', frameon=True, 
               shadow=True, fontsize=10, title="Legend")
    # Final Labels & Clean-up
    plt.title('Validation: Inverse Correlation of Confidence vs. Error Magnitude', fontsize=15, pad=20)
    plt.ylabel('Mean Absolute Error (Δ EDSS Points)', fontsize=12)
    plt.xlabel('LLM Confidence Bracket', fontsize=12)
    # Text annotations for MAE on bars
    for i, bar in enumerate(bars):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.05, 
                 f'MAE: {stats.loc[i, "mean"]:.2f}', ha='center', fontweight='bold')
    plt.grid(axis='y', linestyle=':', alpha=0.5)
    plt.tight_layout()
    plt.show()
 ##
 # %% name
 import pandas as pd
 import numpy as np
 import json
 import glob
 import os
 import matplotlib.pyplot as plt
 import seaborn as sns
 from matplotlib.patches import Patch
 from matplotlib.lines import Line2D
 def plot_final_thesis_error_chart(json_dir_path, ground_truth_path):
    # 1. Load Ground Truth & Predictions
    df_gt = pd.read_csv(ground_truth_path, sep=';')
    df_gt['unique_id'] = df_gt['unique_id'].astype(str).str.strip().str.lower()
    df_gt['MedDatum'] = df_gt['MedDatum'].astype(str).str.strip().str.lower()
    df_gt['EDSS_gt'] = pd.to_numeric(df_gt['EDSS'], errors='coerce')
    all_preds = []
    for file_path in glob.glob(os.path.join(json_dir_path, "*.json")):
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            for entry in data:
                if entry.get("success"):
                    res = entry["result"]
                    all_preds.append({
                        'unique_id': str(res.get('unique_id')).strip().lower(),
                        'MedDatum': str(res.get('MedDatum')).strip().lower(),
                        'EDSS_pred': pd.to_numeric(res.get('EDSS'), errors='coerce'),
                        'confidence': pd.to_numeric(res.get('certainty_percent'), errors='coerce')
                    })
    df_merged = pd.merge(pd.DataFrame(all_preds), df_gt[['unique_id', 'MedDatum', 'EDSS_gt']], 
                         on=['unique_id', 'MedDatum'], how='inner').dropna()
    # 2. Metric Calculation
    df_merged['abs_error'] = (df_merged['EDSS_pred'] - df_merged['EDSS_gt']).abs()
    # 3. Binning & Stats
    bins = [0, 70, 80, 90, 100]
    labels = ['Low (<70%)', 'Moderate (70-80%)', 'High (80-90%)', 'Very High (90-100%)']
    df_merged['conf_bin'] = pd.cut(df_merged['confidence'], bins=bins, labels=labels)
    stats = df_merged.groupby('conf_bin', observed=True)['abs_error'].agg(['mean', 'std', 'count']).reset_index()
    # 4. Plotting
    plt.figure(figsize=(13, 8))
    colors = sns.color_palette("Blues", n_colors=len(labels)) 
    # BARS (MAE)
    bars = plt.bar(stats['conf_bin'], stats['mean'], color=colors, edgecolor='black', alpha=0.85)
    # ERROR BARS (Standard Error of the Mean)
    plt.errorbar(stats['conf_bin'], stats['mean'], 
                 yerr=stats['std']/np.sqrt(stats['count']), 
                 fmt='none', c='black', capsize=8, elinewidth=1.5)
    # CORRELATION TREND LINE
    x_idx = np.arange(len(labels))
    z = np.polyfit(x_idx, stats['mean'], 1)
    p = np.poly1d(z)
    plt.plot(x_idx, p(x_idx), color="#e74c3c", linestyle="--", linewidth=3, zorder=5)
    # 5. DATA LABELS (n and MAE)
    for i, bar in enumerate(bars):
        n_count = int(stats.loc[i, 'count'])
        mae_val = stats.loc[i, 'mean']
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.04, 
                 f'MAE: {mae_val:.2f}\nn={n_count}', 
                 ha='center', va='bottom', fontweight='bold', fontsize=10)
    # 6. THE COMPLETE LEGEND
    legend_elements = [
        Patch(facecolor=colors[0], edgecolor='black', label=f'Bin 1: {labels[0]}'),
        Patch(facecolor=colors[1], edgecolor='black', label=f'Bin 2: {labels[1]}'),
        Patch(facecolor=colors[2], edgecolor='black', label=f'Bin 3: {labels[2]}'),
        Patch(facecolor=colors[3], edgecolor='black', label=f'Bin 4: {labels[3]}'),
        Line2D([0], [0], color='#e74c3c', linestyle='--', lw=3, label='Correlation Trend (Inverse Relationship)'),
        Line2D([0], [0], color='black', marker='_', linestyle='None', markersize=10, label='Standard Error (SEM)'),
        Patch(color='none', label='Metric: Mean Absolute Error (MAE)')
    ]
    plt.legend(handles=legend_elements, loc='upper right', frameon=True, shadow=True, title="Chart Components")
    # Formatting
    plt.title('Clinical Validation: LLM Certainty vs. Prediction Accuracy', fontsize=16, pad=30)
    plt.ylabel('Mean Absolute Error (EDSS Points)', fontsize=12)
    plt.xlabel('LLM Confidence Bracket', fontsize=12)
    plt.grid(axis='y', linestyle=':', alpha=0.5)
    plt.ylim(0, stats['mean'].max() + 0.6) # Add room for labels
    plt.tight_layout()
    plt.show()
 # plot_final_thesis_error_chart('jsons_folder/', 'gt.csv')
 ##
 # %% 1json
 import pandas as pd
 import numpy as np
 import json
 import matplotlib.pyplot as plt
 import seaborn as sns
 from matplotlib.patches import Patch
 from matplotlib.lines import Line2D
 def plot_single_json_error_analysis(json_file_path, ground_truth_path):
    # 1. Load Ground Truth
    df_gt = pd.read_csv(ground_truth_path, sep=';')
    df_gt['unique_id'] = df_gt['unique_id'].astype(str).str.strip().str.lower()
    df_gt['MedDatum'] = df_gt['MedDatum'].astype(str).str.strip().str.lower()
    df_gt['EDSS_gt'] = pd.to_numeric(df_gt['EDSS'], errors='coerce')
    # 2. Load the Specific JSON
    all_preds = []
    with open(json_file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
        for entry in data:
            if entry.get("success"):
                res = entry["result"]
                all_preds.append({
                    'unique_id': str(res.get('unique_id')).strip().lower(),
                    'MedDatum': str(res.get('MedDatum')).strip().lower(),
                    'EDSS_pred': pd.to_numeric(res.get('EDSS'), errors='coerce'),
                    'confidence': pd.to_numeric(res.get('certainty_percent'), errors='coerce')
                })
    df_pred = pd.DataFrame(all_preds)
    # 3. Merge and Calculate Absolute Error
    df_merged = pd.merge(df_pred, df_gt[['unique_id', 'MedDatum', 'EDSS_gt']], 
                         on=['unique_id', 'MedDatum'], how='inner').dropna()
    df_merged['abs_error'] = (df_merged['EDSS_pred'] - df_merged['EDSS_gt']).abs()
    # 4. Binning and Statistics
    bins = [0, 70, 80, 90, 100]
    labels = ['Low (<70%)', 'Moderate (70-80%)', 'High (80-90%)', 'Very High (90-100%)']
    df_merged['conf_bin'] = pd.cut(df_merged['confidence'], bins=bins, labels=labels)
    stats = df_merged.groupby('conf_bin', observed=True)['abs_error'].agg(['mean', 'std', 'count']).reset_index()
    # 5. Plotting
    plt.figure(figsize=(13, 8))
    colors = sns.color_palette("Blues", n_colors=len(labels)) 
    # BARS (MAE)
    bars = plt.bar(stats['conf_bin'], stats['mean'], color=colors, edgecolor='black', alpha=0.85)
    # ERROR BARS (SEM)
    plt.errorbar(stats['conf_bin'], stats['mean'], 
                 yerr=stats['std']/np.sqrt(stats['count']), 
                 fmt='none', c='black', capsize=8, elinewidth=1.5)
    # CORRELATION TREND LINE
    x_idx = np.arange(len(labels))
    z = np.polyfit(x_idx, stats['mean'], 1)
    p = np.poly1d(z)
    plt.plot(x_idx, p(x_idx), color="#e74c3c", linestyle="--", linewidth=3, zorder=5)
    # 6. DATA LABELS (n and MAE)
    for i, bar in enumerate(bars):
        n_count = int(stats.loc[i, 'count'])
        mae_val = stats.loc[i, 'mean']
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.04, 
                 f'MAE: {mae_val:.2f}\nn={n_count}', 
                 ha='center', va='bottom', fontweight='bold', fontsize=10)
    # 7. COMPREHENSIVE LEGEND
    legend_elements = [
        Patch(facecolor=colors[0], edgecolor='black', label=f'Bin 1: {labels[0]}'),
        Patch(facecolor=colors[1], edgecolor='black', label=f'Bin 2: {labels[1]}'),
        Patch(facecolor=colors[2], edgecolor='black', label=f'Bin 3: {labels[2]}'),
        Patch(facecolor=colors[3], edgecolor='black', label=f'Bin 4: {labels[3]}'),
        Line2D([0], [0], color='#e74c3c', linestyle='--', lw=3, label='Inverse Trend Line'),
        Line2D([0], [0], color='black', marker='_', linestyle='None', markersize=10, label='Std Error (SEM)'),
        Patch(color='none', label='Metric: Mean Absolute Error (MAE)')
    ]
    plt.legend(handles=legend_elements, loc='upper right', frameon=True, shadow=True, title="Legend")
    # Final Styling
    plt.title('Validation: Confidence vs. Error Magnitude (Iteration 1 Only)', fontsize=15, pad=30)
    plt.ylabel('Mean Absolute Error (EDSS Points)', fontsize=12)
    plt.xlabel('LLM Confidence Bracket', fontsize=12)
    plt.grid(axis='y', linestyle=':', alpha=0.5)
    plt.ylim(0, stats['mean'].max() + 0.6)
    plt.tight_layout()
    plt.show()
 # --- RUN THE PLOT ---
 json_path = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique_results_iter_1_20260212_020628.json"
 gt_path = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv"
 plot_single_json_error_analysis(json_path, gt_path)
 ##
 # %% Usage
 # --- Usage ---
@@ -1231,5 +1532,9 @@ def plot_error_distribution_by_confidence(json_dir_path, ground_truth_path):
 #plot_edss_vs_confidence_boxplot('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration')
 #plot_gt_vs_llm_boxplot('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
 #plot_categorical_vs_categorical('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
-plot_error_distribution_by_confidence('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
+#plot_error_distribution_by_confidence('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
 #plot_confidence_vs_abs_error_refined('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
 #plot_confidence_vs_abs_error_with_counts('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
 plot_final_thesis_error_chart('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
 ##