adjusting the script with new paths
This commit is contained in:
@@ -1258,7 +1258,7 @@
|
||||
|
||||
|
||||
# %% API call - Multi-model, multi-iteration EDSS + timing/resource benchmark
|
||||
#
|
||||
|
||||
#import time
|
||||
#import json
|
||||
#import os
|
||||
@@ -1279,9 +1279,9 @@
|
||||
# print("Install with: pip install psutil")
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## CONFIGURATION
|
||||
## =========================
|
||||
# =========================
|
||||
# CONFIGURATION
|
||||
# =========================
|
||||
#
|
||||
#load_dotenv()
|
||||
#
|
||||
@@ -1289,21 +1289,21 @@
|
||||
#OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
||||
#
|
||||
#MODEL_CONFIGS = [
|
||||
## {
|
||||
## "model_name": "qwen3.6-35b-a3b",
|
||||
## "use_response_format": False,
|
||||
## "temperature": 0.0,
|
||||
## "max_tokens": 4096,
|
||||
##
|
||||
## # If your backend is vLLM / Qwen chat-template compatible,
|
||||
## # this may reduce long hidden reasoning and JSON truncation.
|
||||
## # If your server errors because of extra_body, set this to None.
|
||||
## "extra_body": {
|
||||
## "chat_template_kwargs": {
|
||||
## "enable_thinking": False
|
||||
## }
|
||||
## },
|
||||
## },
|
||||
# {
|
||||
# "model_name": "qwen3.6-35b-a3b",
|
||||
# "use_response_format": False,
|
||||
# "temperature": 0.0,
|
||||
# "max_tokens": 4096,
|
||||
#
|
||||
# # If your backend is vLLM / Qwen chat-template compatible,
|
||||
# # this may reduce long hidden reasoning and JSON truncation.
|
||||
# # If your server errors because of extra_body, set this to None.
|
||||
# "extra_body": {
|
||||
# "chat_template_kwargs": {
|
||||
# "enable_thinking": False
|
||||
# }
|
||||
# },
|
||||
# },
|
||||
# {
|
||||
# "model_name": "gemma-4-31B-it",
|
||||
# "use_response_format": False,
|
||||
@@ -1328,10 +1328,10 @@
|
||||
#NUM_ITERATIONS = 10
|
||||
#STOP_ON_FIRST_ERROR = False
|
||||
#
|
||||
## For testing, set to e.g. 2.
|
||||
## For full run, set to None.
|
||||
# For testing, set to e.g. 2.
|
||||
# For full run, set to None.
|
||||
#MAX_ROWS = 2
|
||||
## MAX_ROWS = 2
|
||||
# MAX_ROWS = 2
|
||||
#
|
||||
#MAX_TOKENS = 4096
|
||||
#TEMPERATURE = 0.0
|
||||
@@ -1340,14 +1340,14 @@
|
||||
#
|
||||
#SAVE_EVERY_N_ROWS = 1
|
||||
#
|
||||
## Retries for invalid JSON / truncated JSON
|
||||
# Retries for invalid JSON / truncated JSON
|
||||
#MAX_JSON_RETRIES = 2
|
||||
#RETRY_SLEEP_SEC = 2
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## CLIENT
|
||||
## =========================
|
||||
# =========================
|
||||
# CLIENT
|
||||
# =========================
|
||||
#
|
||||
#client = OpenAI(
|
||||
# api_key=OPENAI_API_KEY,
|
||||
@@ -1355,9 +1355,9 @@
|
||||
#)
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## HELPERS
|
||||
## =========================
|
||||
# =========================
|
||||
# HELPERS
|
||||
# =========================
|
||||
#
|
||||
#def safe_dir_name(name: str) -> str:
|
||||
# name = str(name).strip()
|
||||
@@ -1433,9 +1433,9 @@
|
||||
# return max(self.samples_mb)
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## JSON EXTRACTION
|
||||
## =========================
|
||||
# =========================
|
||||
# JSON EXTRACTION
|
||||
# =========================
|
||||
#
|
||||
#def extract_json_from_text(text):
|
||||
# if text is None:
|
||||
@@ -1587,17 +1587,17 @@
|
||||
# return None
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## READ INSTRUCTIONS
|
||||
## =========================
|
||||
# =========================
|
||||
# READ INSTRUCTIONS
|
||||
# =========================
|
||||
#
|
||||
#with open(EDSS_INSTRUCTIONS_PATH, "r", encoding="utf-8") as f:
|
||||
# EDSS_INSTRUCTIONS = f.read().strip()
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## PROMPT
|
||||
## =========================
|
||||
# =========================
|
||||
# PROMPT
|
||||
# =========================
|
||||
#
|
||||
#def build_prompt(patient_text):
|
||||
# return f'''Du bist ein medizinischer Assistent für EDSS-Extraktion aus klinischen Berichten.
|
||||
@@ -1676,9 +1676,9 @@
|
||||
#'''
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## VALIDATION / NORMALIZATION
|
||||
## =========================
|
||||
# =========================
|
||||
# VALIDATION / NORMALIZATION
|
||||
# =========================
|
||||
#
|
||||
#def normalize_model_output(parsed):
|
||||
# if not isinstance(parsed, dict):
|
||||
@@ -1755,9 +1755,9 @@
|
||||
# return parsed
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## API CALL
|
||||
## =========================
|
||||
# =========================
|
||||
# API CALL
|
||||
# =========================
|
||||
#
|
||||
#def make_chat_completion(model_config, prompt):
|
||||
# model_name = model_config["model_name"]
|
||||
@@ -1794,9 +1794,9 @@
|
||||
# return client.chat.completions.create(**kwargs)
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## INFERENCE FUNCTION WITH RETRIES
|
||||
## =========================
|
||||
# =========================
|
||||
# INFERENCE FUNCTION WITH RETRIES
|
||||
# =========================
|
||||
#
|
||||
#def run_inference(patient_text, model_config):
|
||||
# model_name = model_config["model_name"]
|
||||
@@ -1912,9 +1912,9 @@
|
||||
# }
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## BUILD PATIENT TEXT
|
||||
## =========================
|
||||
# =========================
|
||||
# BUILD PATIENT TEXT
|
||||
# =========================
|
||||
#
|
||||
#def build_patient_text(row):
|
||||
# return (
|
||||
@@ -1925,9 +1925,9 @@
|
||||
# )
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## FLATTEN RESULTS FOR CSV
|
||||
## =========================
|
||||
# =========================
|
||||
# FLATTEN RESULTS FOR CSV
|
||||
# =========================
|
||||
#
|
||||
#def flatten_result(record):
|
||||
# flat = {
|
||||
@@ -2012,9 +2012,9 @@
|
||||
# return pd.DataFrame([summary])
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## INCREMENTAL SAVE HELPERS
|
||||
## =========================
|
||||
# =========================
|
||||
# INCREMENTAL SAVE HELPERS
|
||||
# =========================
|
||||
#
|
||||
#def append_jsonl(path, record):
|
||||
# with open(path, "a", encoding="utf-8") as f:
|
||||
@@ -2030,9 +2030,9 @@
|
||||
# df_one.to_csv(path, mode="a", header=not file_exists, index=False)
|
||||
#
|
||||
#
|
||||
## =========================
|
||||
## MAIN LOOP
|
||||
## =========================
|
||||
# =========================
|
||||
# MAIN LOOP
|
||||
# =========================
|
||||
#
|
||||
#if __name__ == "__main__":
|
||||
#
|
||||
@@ -2277,26 +2277,26 @@ MODEL_CONFIGS = [
|
||||
"max_tokens": 4096,
|
||||
"extra_body": None,
|
||||
},
|
||||
# {
|
||||
# "model_name": "GPT-OSS-120B",
|
||||
# "use_response_format": True,
|
||||
# "temperature": 0.0,
|
||||
# "max_tokens": 4096,
|
||||
# "extra_body": None,
|
||||
# },
|
||||
{
|
||||
"model_name": "GPT-OSS-120B",
|
||||
"use_response_format": True,
|
||||
"temperature": 0.0,
|
||||
"max_tokens": 4096,
|
||||
"extra_body": None,
|
||||
},
|
||||
]
|
||||
|
||||
INPUT_CSV = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
||||
EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/attach/Komplett.txt"
|
||||
INPUT_CSV ="/home/shahin/Lab/Doktorarbeit/Barcelona/data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
||||
EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/prompts/Komplett.txt"
|
||||
|
||||
RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results_edss_benchmark"
|
||||
RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results/benchmark_runs"
|
||||
|
||||
NUM_ITERATIONS = 10
|
||||
NUM_ITERATIONS = 2
|
||||
STOP_ON_FIRST_ERROR = False
|
||||
|
||||
# For testing, set to e.g. 2.
|
||||
# For full run, set to None.
|
||||
MAX_ROWS = None
|
||||
MAX_ROWS = 2
|
||||
# MAX_ROWS = 2
|
||||
|
||||
MAX_TOKENS = 4096
|
||||
|
||||
Reference in New Issue
Block a user