# %% Explore Dist Plot import pandas as pd import json import glob import os import re import matplotlib.pyplot as plt def plot_edss_distribution_per_iteration(json_dir_path): # 1. Reuse your categorization logic def categorize_edss(value): if pd.isna(value): return 'Unknown' elif value <= 1.0: return '0-1' elif value <= 2.0: return '1-2' elif value <= 3.0: return '2-3' elif value <= 4.0: return '3-4' elif value <= 5.0: return '4-5' elif value <= 6.0: return '5-6' elif value <= 7.0: return '6-7' elif value <= 8.0: return '7-8' elif value <= 9.0: return '8-9' elif value <= 10.0: return '9-10' else: return '10+' # 2. Extract data from all files with Numerical Sorting all_records = [] json_files = glob.glob(os.path.join(json_dir_path, "*.json")) # Natural sort function to handle Iter 1, Iter 2 ... Iter 10 def natural_key(string_): return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)] json_files.sort(key=natural_key) for i, file_path in enumerate(json_files): # We use the index + 1 for the label to ensure Iter 1 to Iter 10 order iter_label = f"Iter {i+1}" with open(file_path, 'r', encoding='utf-8') as f: try: data = json.load(f) for entry in data: if entry.get("success"): val = entry["result"].get("EDSS") all_records.append({ 'Iteration': iter_label, 'Category': categorize_edss(val), 'Order': i # Used to maintain sort order in the table }) except Exception as e: print(f"Error reading {file_path}: {e}") df = pd.DataFrame(all_records) # 3. Create a Frequency Table (Crosstab) # Pivot so iterations are on the X-axis dist_table = pd.crosstab(df['Iteration'], df['Category']) # Ensure the rows (Iterations) stay in the 1-10 order iter_order = [f"Iter {i+1}" for i in range(len(json_files))] dist_table = dist_table.reindex(iter_order) # Ensure columns follow clinical order fixed_labels = ['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'] available_labels = [l for l in fixed_labels if l in dist_table.columns] dist_table = dist_table[available_labels] # 4. Plotting ax = dist_table.plot(kind='bar', stacked=True, figsize=(14, 8), colormap='viridis', edgecolor='white') plt.title('Distribution of Predicted EDSS Categories per Iteration', fontsize=15, pad=20) plt.xlabel('JSON Iteration File', fontsize=12) plt.ylabel('Number of Cases (Count)', fontsize=12) plt.xticks(rotation=0) # Move legend outside to the right plt.legend(title="EDSS Category", bbox_to_anchor=(1.05, 1), loc='upper left') # Add total count labels on top of bars for i, (name, row) in enumerate(dist_table.iterrows()): total = row.sum() if total > 0: plt.text(i, total + 2, f'Total: {int(total)}', ha='center', va='bottom', fontweight='bold') plt.tight_layout() plt.show() return dist_table # Usage: counts_table = plot_edss_distribution_per_iteration('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration') print(counts_table) ## # %% Explore Table import pandas as pd import json import glob import os import re def generate_edss_distribution_csv(json_dir_path, output_filename='edss_distribution_summary.csv'): # 1. Categorization logic def categorize_edss(value): if pd.isna(value): return 'Unknown' elif value <= 1.0: return '0-1' elif value <= 2.0: return '1-2' elif value <= 3.0: return '2-3' elif value <= 4.0: return '3-4' elif value <= 5.0: return '4-5' elif value <= 6.0: return '5-6' elif value <= 7.0: return '6-7' elif value <= 8.0: return '7-8' elif value <= 9.0: return '8-9' elif value <= 10.0: return '9-10' else: return '10+' # 2. Extract data from files with Natural Sorting all_records = [] json_files = glob.glob(os.path.join(json_dir_path, "*.json")) def natural_key(string_): return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)] json_files.sort(key=natural_key) for i, file_path in enumerate(json_files): iter_label = f"Iter {i+1}" with open(file_path, 'r', encoding='utf-8') as f: try: data = json.load(f) for entry in data: if entry.get("success"): val = entry["result"].get("EDSS") all_records.append({ 'Iteration': iter_label, 'Category': categorize_edss(val) }) except Exception as e: print(f"Error reading {file_path}: {e}") df = pd.DataFrame(all_records) # 3. Create Frequency Table (Crosstab) dist_table = pd.crosstab(df['Iteration'], df['Category']) # 4. Reindex Rows (Numerical order) and Columns (Clinical order) iter_order = [f"Iter {i+1}" for i in range(len(json_files))] dist_table = dist_table.reindex(iter_order) fixed_labels = ['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'] available_labels = [l for l in fixed_labels if l in dist_table.columns] dist_table = dist_table[available_labels] # Fill missing categories with 0 and convert to integers dist_table = dist_table.fillna(0).astype(int) # 5. Add "Total" row at the end # This sums the counts for each category across all iterations dist_table.loc['Total Sum'] = dist_table.sum() # 6. Save to CSV dist_table.to_csv(output_filename) print(f"Table successfully saved to: {output_filename}") return dist_table # Usage: final_table = generate_edss_distribution_csv('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration') print(final_table) ## # %% EDSS Confusion Matrix import pandas as pd import numpy as np import json import glob import os import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay def categorize_edss(value): if pd.isna(value): return np.nan elif value <= 1.0: return '0-1' elif value <= 2.0: return '1-2' elif value <= 3.0: return '2-3' elif value <= 4.0: return '3-4' elif value <= 5.0: return '4-5' elif value <= 6.0: return '5-6' elif value <= 7.0: return '6-7' elif value <= 8.0: return '7-8' elif value <= 9.0: return '8-9' elif value <= 10.0: return '9-10' else: return '10+' def plot_categorized_edss(json_dir_path, ground_truth_path): # 1. Load Ground Truth df_gt = pd.read_csv(ground_truth_path, sep=';') df_gt['unique_id'] = df_gt['unique_id'].astype(str) df_gt['MedDatum'] = df_gt['MedDatum'].astype(str) df_gt['EDSS'] = pd.to_numeric(df_gt['EDSS'], errors='coerce') # 2. Iterate through JSON files all_preds = [] json_pattern = os.path.join(json_dir_path, "*.json") for file_path in glob.glob(json_pattern): with open(file_path, 'r', encoding='utf-8') as f: try: data = json.load(f) for entry in data: if entry.get("success") and "result" in entry: res = entry["result"] all_preds.append({ 'unique_id': str(res.get('unique_id')), 'MedDatum': str(res.get('MedDatum')), 'edss_pred': res.get('EDSS') }) except Exception as e: print(f"Error reading {file_path}: {e}") df_pred = pd.DataFrame(all_preds) df_pred['edss_pred'] = pd.to_numeric(df_pred['edss_pred'], errors='coerce') # 3. Merge and Categorize # Clean keys to ensure 100% match rate for df in [df_gt, df_pred]: df['unique_id'] = df['unique_id'].astype(str).str.strip() df['MedDatum'] = df['MedDatum'].astype(str).str.strip() df_merged = pd.merge( df_gt[['unique_id', 'MedDatum', 'EDSS']], df_pred, on=['unique_id', 'MedDatum'], how='inner' ) df_merged = df_merged.dropna(subset=['EDSS', 'edss_pred']) # --- ADDED THESE LINES TO FIX THE NAMEERROR --- y_true = df_merged['EDSS'].apply(categorize_edss) y_pred = df_merged['edss_pred'].apply(categorize_edss) # ---------------------------------------------- print(f"Verification: Total matches in Confusion Matrix: {len(df_merged)}") # 4. Define fixed labels to handle data gaps fixed_labels = ['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'] # 5. Generate Confusion Matrix cm = confusion_matrix(y_true, y_pred, labels=fixed_labels) # 6. Plotting fig, ax = plt.subplots(figsize=(10, 8)) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=fixed_labels) # Plotting (y_axis is Ground Truth, x_axis is LLM Prediction) disp.plot(cmap=plt.cm.Blues, values_format='d', ax=ax) plt.title('Categorized EDSS: Ground Truth vs LLM Prediction') plt.ylabel('Ground Truth EDSS') plt.xlabel('LLM Prediction') plt.show() ## # %% Confusion Matrix adjusted import pandas as pd import numpy as np import json import glob import os import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay def categorize_edss(value): """Bins EDSS values into clinical categories.""" if pd.isna(value): return np.nan elif value <= 1.0: return '0-1' elif value <= 2.0: return '1-2' elif value <= 3.0: return '2-3' elif value <= 4.0: return '3-4' elif value <= 5.0: return '4-5' elif value <= 6.0: return '5-6' elif value <= 7.0: return '6-7' elif value <= 8.0: return '7-8' elif value <= 9.0: return '8-9' elif value <= 10.0: return '9-10' else: return '10+' def plot_categorized_edss(json_dir_path, ground_truth_path): # 1. Load Ground Truth with Normalization df_gt = pd.read_csv(ground_truth_path, sep=';') # Standardize keys to ensure 1:N matching works df_gt['unique_id'] = df_gt['unique_id'].astype(str).str.strip().str.lower() df_gt['MedDatum'] = df_gt['MedDatum'].astype(str).str.strip().str.lower() df_gt['EDSS'] = pd.to_numeric(df_gt['EDSS'], errors='coerce') # 2. Load All Predictions from JSONs all_preds = [] json_files = glob.glob(os.path.join(json_dir_path, "*.json")) for file_path in json_files: with open(file_path, 'r', encoding='utf-8') as f: try: data = json.load(f) for entry in data: # We only take 'success': true entries if entry.get("success") and "result" in entry: res = entry["result"] all_preds.append({ 'unique_id': str(res.get('unique_id')).strip().lower(), 'MedDatum': str(res.get('MedDatum')).strip().lower(), 'edss_pred': res.get('EDSS') }) except Exception as e: print(f"Error reading {file_path}: {e}") df_pred = pd.DataFrame(all_preds) df_pred['edss_pred'] = pd.to_numeric(df_pred['edss_pred'], errors='coerce') # 3. Merge (This should give you ~3934 rows based on your audit) df_merged = pd.merge( df_gt[['unique_id', 'MedDatum', 'EDSS']], df_pred, on=['unique_id', 'MedDatum'], how='inner' ) # --- THE BIG REVEAL: Count the NaNs --- nan_in_gt = df_merged['EDSS'].isna().sum() nan_in_pred = df_merged['edss_pred'].isna().sum() print("-" * 40) print(f"TOTAL MERGED ROWS: {len(df_merged)}") print(f"Rows with missing Ground Truth EDSS: {nan_in_gt}") print(f"Rows with missing Prediction EDSS: {nan_in_pred}") print("-" * 40) # Now drop rows that have NO values in either side for the matrix df_final = df_merged.dropna(subset=['EDSS', 'edss_pred']).copy() print(f"FINAL ROWS FOR CONFUSION MATRIX: {len(df_final)}") print("-" * 40) # 4. Categorize for the Matrix y_true = df_final['EDSS'].apply(categorize_edss) y_pred = df_final['edss_pred'].apply(categorize_edss) fixed_labels = ['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'] # 5. Generate and Print Raw Matrix cm = confusion_matrix(y_true, y_pred, labels=fixed_labels) # Print the Raw Matrix to terminal cm_df = pd.DataFrame(cm, index=[f"True_{l}" for l in fixed_labels], columns=[f"Pred_{l}" for l in fixed_labels]) print("\nRAW CONFUSION MATRIX (Rows=True, Cols=Pred):") print(cm_df) # 6. Plotting fig, ax = plt.subplots(figsize=(12, 10)) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=fixed_labels) # Values_format='d' ensures we see whole numbers, not scientific notation disp.plot(cmap=plt.cm.Blues, values_format='d', ax=ax) plt.title(f'EDSS Confusion Matrix\n(n={len(df_final)} iterations across ~400 cases)', fontsize=14) plt.ylabel('Ground Truth (Clinician)') plt.xlabel('LLM Prediction') plt.xticks(rotation=45) plt.tight_layout() plt.show() ## # %% Subcategories import pandas as pd import numpy as np import json import glob import os import matplotlib.pyplot as plt def plot_subcategory_analysis(json_dir_path, ground_truth_path): # 1. Column Mapping (JSON Key : CSV Column) mapping = { "VISUAL_OPTIC_FUNCTIONS": "Sehvermögen", "BRAINSTEM_FUNCTIONS": "Hirnstamm", "PYRAMIDAL_FUNCTIONS": "Pyramidalmotorik", "CEREBELLAR_FUNCTIONS": "Cerebellum", "SENSORY_FUNCTIONS": "Sensibiliät", "BOWEL_AND_BLADDER_FUNCTIONS": "Blasen-_und_Mastdarmfunktion", "CEREBRAL_FUNCTIONS": "Cerebrale_Funktion", "AMBULATION": "Ambulation" } # 2. Load Ground Truth df_gt = pd.read_csv(ground_truth_path, sep=';') df_gt['unique_id'] = df_gt['unique_id'].astype(str) df_gt['MedDatum'] = df_gt['MedDatum'].astype(str) # 3. Load Predictions including Subcategories all_preds = [] for file_path in glob.glob(os.path.join(json_dir_path, "*.json")): with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) for entry in data: if entry.get("success"): res = entry["result"] row = { 'unique_id': str(res.get('unique_id')), 'MedDatum': str(res.get('MedDatum')) } # Add subcategory scores for json_key in mapping.keys(): row[json_key] = res.get('subcategories', {}).get(json_key) all_preds.append(row) df_pred = pd.DataFrame(all_preds) # 4. Merge df_merged = pd.merge(df_gt, df_pred, on=['unique_id', 'MedDatum'], suffixes=('_gt', '_llm')) # 5. Calculate Metrics results = [] for json_key, csv_col in mapping.items(): # Ensure numeric true_vals = pd.to_numeric(df_merged[csv_col], errors='coerce') pred_vals = pd.to_numeric(df_merged[json_key], errors='coerce') # Drop NaNs for this specific subcategory mask = true_vals.notna() & pred_vals.notna() y_t = true_vals[mask] y_p = pred_vals[mask] if len(y_t) > 0: accuracy = (y_t == y_p).mean() * 100 mae = np.abs(y_t - y_p).mean() # Mean Absolute Error (Deviation) results.append({ 'Subcategory': csv_col, 'Accuracy': accuracy, 'Deviation': mae }) stats_df = pd.DataFrame(results).sort_values('Accuracy', ascending=False) # 6. Plotting fig, ax1 = plt.subplots(figsize=(14, 7)) # Bar chart for Accuracy bars = ax1.bar(stats_df['Subcategory'], stats_df['Accuracy'], color='#3498db', alpha=0.8, label='Accuracy (%)') ax1.set_ylabel('Accuracy (%)', color='#2980b9', fontsize=12, fontweight='bold') ax1.set_ylim(0, 115) # Extra head room for labels ax1.grid(axis='y', linestyle='--', alpha=0.7) # Rotate labels plt.xticks(rotation=30, ha='right', fontsize=10) # Line chart for Deviation ax2 = ax1.twinx() ax2.plot(stats_df['Subcategory'], stats_df['Deviation'], color='#e74c3c', marker='o', linewidth=2.5, markersize=8, label='Mean Abs. Deviation (Score Points)') ax2.set_ylabel('Mean Absolute Deviation', color='#c0392b', fontsize=12, fontweight='bold') # Adjust ax2 limit to avoid overlap with accuracy text ax2.set_ylim(0, max(stats_df['Deviation']) * 1.5 if not stats_df['Deviation'].empty else 1) # plt.title('Subcategory Performance: Accuracy vs. Mean Deviation', fontsize=14, pad=20) # --- THE FIX: Better Legend Placement --- # Combine legends from both axes and place them above the plot lines1, labels1 = ax1.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper center', bbox_to_anchor=(0.5, 1.12), ncol=2, frameon=False, fontsize=11) # Add percentage labels on top of bars for bar in bars: height = bar.get_height() ax1.annotate(f'{height:.1f}%', xy=(bar.get_x() + bar.get_width() / 2, height), xytext=(0, 5), textcoords="offset points", ha='center', va='bottom', fontweight='bold', color='#2c3e50') plt.tight_layout() plt.show() ## # %% Certainty import pandas as pd import numpy as np import json import glob import os import matplotlib.pyplot as plt def categorize_edss(value): if pd.isna(value): return np.nan elif value <= 1.0: return '0-1' elif value <= 2.0: return '1-2' elif value <= 3.0: return '2-3' elif value <= 4.0: return '3-4' elif value <= 5.0: return '4-5' elif value <= 6.0: return '5-6' elif value <= 7.0: return '6-7' elif value <= 8.0: return '7-8' elif value <= 9.0: return '8-9' elif value <= 10.0: return '9-10' else: return '10+' def plot_certainty_vs_accuracy_by_category(json_dir_path, ground_truth_path): # 1. Data Loading & Merging df_gt = pd.read_csv(ground_truth_path, sep=';') df_gt['unique_id'] = df_gt['unique_id'].astype(str) df_gt['MedDatum'] = df_gt['MedDatum'].astype(str) df_gt['EDSS_true'] = pd.to_numeric(df_gt['EDSS'], errors='coerce') all_preds = [] for file_path in glob.glob(os.path.join(json_dir_path, "*.json")): with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) for entry in data: if entry.get("success"): res = entry["result"] all_preds.append({ 'unique_id': str(res.get('unique_id')), 'MedDatum': str(res.get('MedDatum')), 'EDSS_pred': res.get('EDSS'), 'certainty': res.get('certainty_percent') }) df_pred = pd.DataFrame(all_preds) df_pred['EDSS_pred'] = pd.to_numeric(df_pred['EDSS_pred'], errors='coerce') df = pd.merge(df_gt[['unique_id', 'MedDatum', 'EDSS_true']], df_pred, on=['unique_id', 'MedDatum']).dropna() # 2. Process Metrics df['gt_category'] = df['EDSS_true'].apply(categorize_edss) df['is_correct'] = (df['EDSS_true'].round(1) == df['EDSS_pred'].round(1)) fixed_labels = ['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'] # Calculate Mean Certainty and Mean Accuracy per category stats = df.groupby('gt_category').agg({ 'is_correct': 'mean', 'certainty': 'mean', 'unique_id': 'count' }).reindex(fixed_labels) stats['accuracy_percent'] = stats['is_correct'] * 100 stats = stats.fillna(0) # 3. Plotting x = np.arange(len(fixed_labels)) width = 0.35 # Width of the bars fig, ax = plt.subplots(figsize=(14, 8)) # Plotting both bars side-by-side rects1 = ax.bar(x - width/2, stats['accuracy_percent'], width, label='Actual Accuracy (%)', color='#2ecc71', alpha=0.8) rects2 = ax.bar(x + width/2, stats['certainty'], width, label='LLM Avg. Certainty (%)', color='#e67e22', alpha=0.8) # Add text labels, titles and custom x-axis tick labels, etc. ax.set_ylabel('Percentage (%)', fontsize=12) ax.set_xlabel('Ground Truth EDSS Category', fontsize=12) # ax.set_title('Comparison: LLM Confidence (Certainty) vs. Real Accuracy per EDSS Range', fontsize=15, pad=25) ax.set_xticks(x) ax.set_xticklabels(fixed_labels) ax.set_ylim(0, 115) ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.08), ncol=2, frameon=False) ax.grid(axis='y', linestyle=':', alpha=0.5) # Helper function to label bar heights def autolabel(rects): for rect in rects: height = rect.get_height() if height > 0: ax.annotate(f'{height:.0f}%', xy=(rect.get_x() + rect.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9, fontweight='bold') autolabel(rects1) autolabel(rects2) # Add sample size (n) at the bottom for i, count in enumerate(stats['unique_id']): ax.text(i, 2, f'n={int(count)}', ha='center', va='bottom', fontsize=10, color='white', fontweight='bold') plt.tight_layout() plt.show() ## # %% Audit import pandas as pd import numpy as np import json import glob import os def audit_matches(json_dir_path, ground_truth_path): # 1. Load GT df_gt = pd.read_csv(ground_truth_path, sep=';') # 2. Advanced Normalization def clean_series(s): return s.astype(str).str.strip().str.lower() df_gt['unique_id'] = clean_series(df_gt['unique_id']) df_gt['MedDatum'] = clean_series(df_gt['MedDatum']) # 3. Load Predictions all_preds = [] for file_path in glob.glob(os.path.join(json_dir_path, "*.json")): with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) file_name = os.path.basename(file_path) for entry in data: if entry.get("success"): res = entry["result"] all_preds.append({ 'unique_id': str(res.get('unique_id')).strip().lower(), 'MedDatum': str(res.get('MedDatum')).strip().lower(), 'file': file_name }) df_pred = pd.DataFrame(all_preds) # 4. Find the "Ghost" entries (In JSON but not in GT) # Create a 'key' column for easy comparison df_gt['key'] = df_gt['unique_id'] + "_" + df_gt['MedDatum'] df_pred['key'] = df_pred['unique_id'] + "_" + df_pred['MedDatum'] gt_keys = set(df_gt['key']) df_pred['is_matched'] = df_pred['key'].isin(gt_keys) unmatched_summary = df_pred[df_pred['is_matched'] == False] print("--- AUDIT RESULTS ---") print(f"Total rows in JSON: {len(df_pred)}") print(f"Rows that matched GT: {df_pred['is_matched'].sum()}") print(f"Rows that FAILED to match: {len(unmatched_summary)}") if not unmatched_summary.empty: print("\nFirst 10 Unmatched Entries (check these against your CSV):") print(unmatched_summary[['unique_id', 'MedDatum', 'file']].head(10)) # Breakdown by file - see if specific JSON files are broken print("\nFailure count per JSON file:") print(unmatched_summary['file'].value_counts()) ## # %% Usage # --- Usage --- #plot_categorized_edss('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', # '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv') #plot_subcategory_analysis('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv') plot_certainty_vs_accuracy_by_category('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv') #audit_matches('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv') ##