import pandas as pd import numpy as np import seaborn as sns # Sample data (replace with your actual df) df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t') # Identify GT and Result columns gt_columns = [col for col in df.columns if col.startswith('GT.')] result_columns = [col for col in df.columns if col.startswith('result.')] # Create mapping column_mapping = {} for gt_col in gt_columns: base_name = gt_col.replace('GT.', '') result_col = f'result.{base_name}' if result_col in result_columns: column_mapping[gt_col] = result_col # Function to compute match percentage for each GT-Result pair def compute_match_percentages(df, column_mapping): percentages = [] for gt_col, result_col in column_mapping.items(): count = 0 total = len(df) for _, row in df.iterrows(): gt_val = row[gt_col] result_val = row[result_col] # Handle NaN values if pd.isna(gt_val) or pd.isna(result_val): continue # Handle non-numeric values try: gt_float = float(gt_val) result_float = float(result_val) except (ValueError, TypeError): # Skip rows with non-numeric values continue # Check if values are within 0.5 tolerance if abs(gt_float - result_float) <= 0.5: count += 1 percentage = (count / total) * 100 percentages.append({ 'GT_Column': gt_col, 'Result_Column': result_col, 'Match_Percentage': round(percentage, 1) }) return pd.DataFrame(percentages) # Compute match percentages match_df = compute_match_percentages(df, column_mapping) # Create a pivot table for gradient display (optional but helpful) pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0) # Apply gradient background cm = sns.light_palette("green", as_cmap=True) styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None) # Display result print("Agreement Percentage Table (with gradient):") styled_table