72 lines
2.2 KiB
Python
72 lines
2.2 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
import seaborn as sns
|
|
|
|
# Sample data (replace with your actual df)
|
|
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
|
|
|
|
# Identify GT and Result columns
|
|
gt_columns = [col for col in df.columns if col.startswith('GT.')]
|
|
result_columns = [col for col in df.columns if col.startswith('result.')]
|
|
|
|
# Create mapping
|
|
column_mapping = {}
|
|
for gt_col in gt_columns:
|
|
base_name = gt_col.replace('GT.', '')
|
|
result_col = f'result.{base_name}'
|
|
if result_col in result_columns:
|
|
column_mapping[gt_col] = result_col
|
|
|
|
# Function to compute match percentage for each GT-Result pair
|
|
def compute_match_percentages(df, column_mapping):
|
|
percentages = []
|
|
for gt_col, result_col in column_mapping.items():
|
|
count = 0
|
|
total = len(df)
|
|
|
|
for _, row in df.iterrows():
|
|
gt_val = row[gt_col]
|
|
result_val = row[result_col]
|
|
|
|
# Handle NaN values
|
|
if pd.isna(gt_val) or pd.isna(result_val):
|
|
continue
|
|
|
|
# Handle non-numeric values
|
|
try:
|
|
gt_float = float(gt_val)
|
|
result_float = float(result_val)
|
|
except (ValueError, TypeError):
|
|
# Skip rows with non-numeric values
|
|
continue
|
|
|
|
# Check if values are within 0.5 tolerance
|
|
if abs(gt_float - result_float) <= 0.5:
|
|
count += 1
|
|
|
|
percentage = (count / total) * 100
|
|
percentages.append({
|
|
'GT_Column': gt_col,
|
|
'Result_Column': result_col,
|
|
'Match_Percentage': round(percentage, 1)
|
|
})
|
|
|
|
return pd.DataFrame(percentages)
|
|
|
|
# Compute match percentages
|
|
match_df = compute_match_percentages(df, column_mapping)
|
|
|
|
# Create a pivot table for gradient display (optional but helpful)
|
|
pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0)
|
|
|
|
# Apply gradient background
|
|
cm = sns.light_palette("green", as_cmap=True)
|
|
styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None)
|
|
|
|
# Display result
|
|
print("Agreement Percentage Table (with gradient):")
|
|
styled_table
|
|
|
|
|
|
|