Files
EDSS-calc/Data/styled_tables.py

72 lines
2.2 KiB
Python

import pandas as pd
import numpy as np
import seaborn as sns
# Sample data (replace with your actual df)
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
# Identify GT and Result columns
gt_columns = [col for col in df.columns if col.startswith('GT.')]
result_columns = [col for col in df.columns if col.startswith('result.')]
# Create mapping
column_mapping = {}
for gt_col in gt_columns:
base_name = gt_col.replace('GT.', '')
result_col = f'result.{base_name}'
if result_col in result_columns:
column_mapping[gt_col] = result_col
# Function to compute match percentage for each GT-Result pair
def compute_match_percentages(df, column_mapping):
percentages = []
for gt_col, result_col in column_mapping.items():
count = 0
total = len(df)
for _, row in df.iterrows():
gt_val = row[gt_col]
result_val = row[result_col]
# Handle NaN values
if pd.isna(gt_val) or pd.isna(result_val):
continue
# Handle non-numeric values
try:
gt_float = float(gt_val)
result_float = float(result_val)
except (ValueError, TypeError):
# Skip rows with non-numeric values
continue
# Check if values are within 0.5 tolerance
if abs(gt_float - result_float) <= 0.5:
count += 1
percentage = (count / total) * 100
percentages.append({
'GT_Column': gt_col,
'Result_Column': result_col,
'Match_Percentage': round(percentage, 1)
})
return pd.DataFrame(percentages)
# Compute match percentages
match_df = compute_match_percentages(df, column_mapping)
# Create a pivot table for gradient display (optional but helpful)
pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0)
# Apply gradient background
cm = sns.light_palette("green", as_cmap=True)
styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None)
# Display result
print("Agreement Percentage Table (with gradient):")
styled_table