Compare commits
13 Commits
fd989d0437
...
Voxtral
| Author | SHA1 | Date | |
|---|---|---|---|
| 4a0d398a91 | |||
| 9ef1815b84 | |||
| 28ce0c7875 | |||
| 67769e10ae | |||
| f2b30c88e2 | |||
| 8c640c1cf6 | |||
| f65b12fc12 | |||
| 25c5be587f | |||
|
|
93aaa72e3c | ||
|
|
fde4dd57fe | ||
|
|
7e5e8abc63 | ||
|
|
3fc4f1a55c | ||
|
|
56adbaf443 |
6
.gitignore
vendored
6
.gitignore
vendored
@@ -49,7 +49,7 @@ Thumbs.db
|
|||||||
# ======================
|
# ======================
|
||||||
# Experimental scripts
|
# Experimental scripts
|
||||||
# ======================
|
# ======================
|
||||||
2moodle.py
|
#2moodle.py
|
||||||
app.old_inside_pipeline.py
|
#app.old_inside_pipeline.py
|
||||||
feedback_bewertung.py
|
#feedback_bewertung.py
|
||||||
|
|
||||||
|
|||||||
22
README.md
22
README.md
@@ -1,6 +1,6 @@
|
|||||||
# 🩺 OSCE Feedback Evaluator
|
# 🩺 OSCE Feedback Evaluator
|
||||||
|
|
||||||
This tool automates the feedback process for clinical exams. Just point it at a directory of files and let it do the heavy lifting.
|
This tool automates the feedback process for OSCE clinical exams. Just point it at a directory of files and let it do the heavy lifting.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -19,18 +19,20 @@ python3 -m venv venv
|
|||||||
source venv/bin/activate
|
source venv/bin/activate
|
||||||
# (Windows users: .\venv\Scripts\activate)
|
# (Windows users: .\venv\Scripts\activate)
|
||||||
|
|
||||||
|
```
|
||||||
### 2. Get the Code
|
### 2. Get the Code
|
||||||
|
|
||||||
Pull the latest changes from the repository:
|
Pull the latest changes from the repository:
|
||||||
git pull https://github.com/Shahin-rmz/OSCE-Feedback-Evaluator.git
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git pull https://github.com/Shahin-rmz/OSCE-Feedback-Evaluator.git
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### 3. Install Dependencies
|
### 3. Install Dependencies
|
||||||
|
|
||||||
Make sure you are in the root directory where requirements.txt is located:
|
Make sure you are in the root directory where requirements.txt is located:
|
||||||
Bash
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
@@ -48,3 +50,15 @@ python3 osce_pipeline.py
|
|||||||
### 5. Process your Data
|
### 5. Process your Data
|
||||||
|
|
||||||
When prompted, paste the path of the directory containing the files the app should work on.
|
When prompted, paste the path of the directory containing the files the app should work on.
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
📝 Notes
|
||||||
|
|
||||||
|
Make sure your .env file is set up inside the /pipeline folder if the script requires API keys or specific credentials.
|
||||||
|
|
||||||
|
Use the .env.example file as a template if you're setting this up for the first time.
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
1539
exp/2moodle.py
Normal file
1539
exp/2moodle.py
Normal file
File diff suppressed because it is too large
Load Diff
1539
exp/app.old_inside_pipeline.py
Normal file
1539
exp/app.old_inside_pipeline.py
Normal file
File diff suppressed because it is too large
Load Diff
191
exp/feedback_bewertung.py
Normal file
191
exp/feedback_bewertung.py
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
# %% Feedback_Bewertung
|
||||||
|
import openai
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="sk--T3QiY4gBE67o9oSxEOqxw",
|
||||||
|
base_url="http://pluto/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
EVAL_PROMPT = """
|
||||||
|
Du bist ein strenger, objektiver Bewertender.
|
||||||
|
Bewerte das folgende Feedback anhand der untenstehenden Kriterien.
|
||||||
|
|
||||||
|
Für jedes Kriterium musst du GENAU folgendes liefern:
|
||||||
|
|
||||||
|
- answer: true oder false
|
||||||
|
- explanation: eine kurze, klare Begründung auf Deutsch, basierend ausschließlich auf dem gegebenen Feedback
|
||||||
|
|
||||||
|
KRITERIEN:
|
||||||
|
1. Ist das Feedback zeitnah? (<48 Stunden)
|
||||||
|
2. Enthält das Feedback narrative Kommentare?
|
||||||
|
3. Ist der Ton positiv und/oder unterstützend?
|
||||||
|
4. Beschreibt das Feedback beobachtbares Verhalten in klarer, eindeutiger Sprache?
|
||||||
|
5. Verstärkt das Feedback, was gut gemacht wurde?
|
||||||
|
6. Zeigt das Feedback Bereiche auf, in denen Verbesserungen nötig sind?
|
||||||
|
7. Enthält das Feedback konkrete, spezifische Strategien zur Verbesserung?
|
||||||
|
8. Bezieht sich das Feedback auf definierte Standards (z. B. EPA, Leistungsniveaus)?
|
||||||
|
9. Passt der Anspruch/die Komplexität des Feedbacks zum Lernstand der Lernenden?
|
||||||
|
|
||||||
|
Gib die Ergebnisse AUSSCHLIESSLICH als JSON gemäß dem bereitgestellten Schema zurück.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
class CriterionResult(BaseModel):
|
||||||
|
answer: bool | None
|
||||||
|
explanation: str
|
||||||
|
|
||||||
|
class FeedbackEvaluation(BaseModel):
|
||||||
|
timely: CriterionResult
|
||||||
|
narrative_comments: CriterionResult
|
||||||
|
positive_supportive_tone: CriterionResult
|
||||||
|
clear_language: CriterionResult
|
||||||
|
reinforces_strengths: CriterionResult
|
||||||
|
identifies_improvements: CriterionResult
|
||||||
|
includes_strategies: CriterionResult
|
||||||
|
relates_to_standards: CriterionResult
|
||||||
|
congruent_with_learner_level: CriterionResult
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# %% Main
|
||||||
|
|
||||||
|
input_dir = "./cruscloud/AudioFeedbackProject/Transkripte"
|
||||||
|
|
||||||
|
files = list(Path(input_dir).glob("*.txt"))
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
text = f.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
response = client.responses.parse(
|
||||||
|
model="GPT-OSS-120B",
|
||||||
|
input=[
|
||||||
|
{"role": "system", "content": EVAL_PROMPT},
|
||||||
|
{"role": "user", "content": text},
|
||||||
|
],
|
||||||
|
text_format=FeedbackEvaluation,
|
||||||
|
)
|
||||||
|
|
||||||
|
results[f.name] = response.output_parsed
|
||||||
|
|
||||||
|
# Save results
|
||||||
|
import json
|
||||||
|
with open("feedback_evaluations.json", "w", encoding="utf-8") as out:
|
||||||
|
json.dump({k: v.dict() for k, v in results.items()}, out, indent=4)
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# %% Feedback_Bewertung_Moodle
|
||||||
|
import openai
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="sk--T3QiY4gBE67o9oSxEOqxw",
|
||||||
|
base_url="http://pluto/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
EVAL_PROMPT = """
|
||||||
|
Du bist ein strenger, objektiver Bewertender für medizinische Lehre.
|
||||||
|
Bewerte das folgende Feedback anhand der Kursinhalte "Feedback in der Lehre: Basics".
|
||||||
|
Für jedes Kriterium musst du GENAU folgendes liefern:
|
||||||
|
- score: Zahl von 1 (beste Bewertung) bis 5 (gescheitert)
|
||||||
|
- explanation: Ausführliche Begründung auf Deutsch mit:
|
||||||
|
a) Zitat aus dem Kursmaterial das das Kriterium definiert
|
||||||
|
b) Konkrete Analyse des Feedback-Textes mit Zitaten
|
||||||
|
c) Klare Verknüpfung zwischen Kursanforderung und Feedback-Umsetzung
|
||||||
|
|
||||||
|
KRITERIEN (basierend auf Kursmaterial):
|
||||||
|
1. KONKRETHEIT (Bild 4: Merkmale von Feedback)
|
||||||
|
"Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich."
|
||||||
|
Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen?
|
||||||
|
|
||||||
|
2. ICH-BOTSCHAFTEN (Feedbackregeln)
|
||||||
|
"Ein Feedback … wird in „Ich-Botschaften“ ausgedrückt. … ist nicht (ab)wertend."
|
||||||
|
Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt?
|
||||||
|
|
||||||
|
3. STRUKTUR (WWW/BEB-Prinzip)
|
||||||
|
WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..."
|
||||||
|
BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..."
|
||||||
|
Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)?
|
||||||
|
|
||||||
|
4. WERTSCHÄTZUNG (Bild 4: Merkmale von Feedback)
|
||||||
|
"Feedback ... auf Augenhöhe: Feedbackgeber und Feedbackempfänger gehen respektvoll und wertschätzend miteinander um."
|
||||||
|
Bewertung: Wird respektvoll und auf Augenhöhe kommuniziert?
|
||||||
|
|
||||||
|
5. VERBESSERUNGSEMPFEHLUNG (Feedbackregeln)
|
||||||
|
"Ein Feedback … endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
|
||||||
|
Bewertung: Gibt es konkrete, wertschätzende Handlungsempfehlungen?
|
||||||
|
|
||||||
|
SCORING-ANLEITUNG:
|
||||||
|
1 = Vollständige Umsetzung (exzellentes Beispiel)
|
||||||
|
2 = Gute Umsetzung mit minimalen Lücken
|
||||||
|
3 = Grundlegende Umsetzung mit signifikanten Mängeln
|
||||||
|
4 = Unzureichende Umsetzung (wichtige Elemente fehlen)
|
||||||
|
5 = Keine erkennbare Umsetzung (kriterienwidrig)
|
||||||
|
|
||||||
|
Gib die Ergebnisse AUSSCHLIESSLICH als JSON gemäß dem bereitgestellten Schema zurück.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
class CriterionResult(BaseModel):
|
||||||
|
score: int # 1-5 (1=best, 5=failed)
|
||||||
|
explanation: str
|
||||||
|
class FeedbackEvaluation(BaseModel):
|
||||||
|
konkretes_feedback: CriterionResult
|
||||||
|
ich_botschaften: CriterionResult
|
||||||
|
struktur: CriterionResult
|
||||||
|
wertschaetzung: CriterionResult
|
||||||
|
verbesserungsempfehlung: CriterionResult
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# %% Main
|
||||||
|
input_dir = "./cruscloud/AudioFeedbackProject/Transkripte"
|
||||||
|
files = list(Path(input_dir).glob("*.txt"))
|
||||||
|
results = {}
|
||||||
|
for f in files:
|
||||||
|
text = f.read_text(encoding="utf-8")
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="GPT-OSS-120B",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": EVAL_PROMPT},
|
||||||
|
{"role": "user", "content": text},
|
||||||
|
],
|
||||||
|
response_format={"type": "json_object"},
|
||||||
|
temperature=0.1
|
||||||
|
)
|
||||||
|
# Parse the JSON response
|
||||||
|
import json
|
||||||
|
parsed_response = json.loads(response.choices[0].message.content)
|
||||||
|
results[f.name] = FeedbackEvaluation(**parsed_response)
|
||||||
|
|
||||||
|
# Save results
|
||||||
|
import json
|
||||||
|
with open("feedback_evaluations.json", "w", encoding="utf-8") as out:
|
||||||
|
json.dump({k: v.dict() for k, v in results.items()}, out, indent=4, ensure_ascii=False)
|
||||||
|
|
||||||
|
##
|
||||||
@@ -1,3 +1,6 @@
|
|||||||
API_KEY=YOUR_API_KEY
|
API_KEY=YOUR_API_KEY
|
||||||
BASE_URL=SERVER_ADDRESS
|
BASE_URL=SERVER_ADDRESS
|
||||||
|
|
||||||
|
# Optional
|
||||||
|
OUTPUT_ADMIN_DIR=admin_dir
|
||||||
|
OUTPUT_STUDENT_DIR=studi_dir
|
||||||
|
|||||||
0
pipeline/__init__.py
Normal file
0
pipeline/__init__.py
Normal file
404
pipeline/feedbacker.py
Normal file
404
pipeline/feedbacker.py
Normal file
@@ -0,0 +1,404 @@
|
|||||||
|
|
||||||
|
# %% Isabella
|
||||||
|
|
||||||
|
|
||||||
|
import openai
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import datetime
|
||||||
|
import time
|
||||||
|
import json as json_module
|
||||||
|
import csv
|
||||||
|
from pydantic import BaseModel
|
||||||
|
import math
|
||||||
|
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="xxx",
|
||||||
|
base_url="xxx"
|
||||||
|
)
|
||||||
|
|
||||||
|
EVAL_PROMPT = '''
|
||||||
|
Du bist ein strenger, objektiver Bewertender für medizinische Lehre. Bewerte das folgende Feedback anhand der Kursinhalte "Feedback in der Lehre: Basics". KRITERIEN (basierend auf Kursmaterial): A1 PERSPEKTIVE (Ich-Botschaften) Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt? A2 RESPEKT & WERTFREIHEIT Bewertung: Wird respektvoll und wertfrei kommuniziert? B1 KONKRETHEIT "Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich." Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen? B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION Bewertung: Wird zwischen beobachtbaren Fakten und Interpretationen unterschieden? C1 STRUKTURIERTE LOGIK (WWW/BEB-Prinzip) WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..." BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..." Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)? D1 ZUKUNGSORIENTIERTE EMPFEHLUNG Bewertung: Gibt es konkrete, zukunftsorientierte Handlungsempfehlungen, die wertschätzend formuliert sind? D2 WERTSCHÄTZENDER ABSCHLUSS Bewertung: Schließt das Feedback wertschätzend ab? E1 KOMMUNIKATIONSEBENEN "Vier Seiten einer Nachricht: Sachinhalt, Selbstoffenbarung, Beziehung, Appell" Bewertung: Berücksichtigt das Feedback die verschiedenen Kommunikationsebenen? F1 FÖRDERUNG VON REFLEXION Bewertung: Fördert das Feedback die Reflexion und das Lernen?
|
||||||
|
|
||||||
|
SCORING-ANLEITUNG:
|
||||||
|
1 = Vollständige Umsetzung (exzellentes Beispiel)
|
||||||
|
2 = Gute Umsetzung mit minimalen Lücken
|
||||||
|
3 = Grundlegende Umsetzung mit signifikanten Mängeln
|
||||||
|
4 = Unzureichende Umsetzung (wichtige Elemente fehlen)
|
||||||
|
5 = Keine erkennbare Umsetzung (kriterienwidrig)
|
||||||
|
|
||||||
|
WICHTIG: Gib die Ergebnisse AUSSCHLIESSLICH als JSON mit EXAKT folgender Struktur zurück:
|
||||||
|
{
|
||||||
|
"scores": {
|
||||||
|
"A1": {"score": 1, "justification": "Begründung hier"},
|
||||||
|
"A2": {"score": 1, "justification": "Begründung hier"},
|
||||||
|
"B1": {"score": 1, "justification": "Begründung hier"},
|
||||||
|
"B2": {"score": 1, "justification": "Begründung hier"},
|
||||||
|
"C1": {"score": 1, "justification": "Begründung hier"},
|
||||||
|
"D1": {"score": 1, "justification": "Begründung hier"},
|
||||||
|
"D2": {"score": 1, "justification": "Begründung hier"},
|
||||||
|
"E1": {"score": 1, "justification": "Begründung hier"},
|
||||||
|
"F1": {"score": 1, "justification": "Begründung hier"}
|
||||||
|
},
|
||||||
|
"total_score": 0,
|
||||||
|
"quality_level": "Beispiel-Qualitätsstufe",
|
||||||
|
"strengths": ["Stärke 1", "Stärke 2"],
|
||||||
|
"weaknesses": ["Schwäche 1", "Schwäche 2"],
|
||||||
|
"improvement_suggestions": ["Vorschlag 1", "Vorschlag 2", "Vorschlag 3"]
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
|
||||||
|
class ScoreItem(BaseModel):
|
||||||
|
score: int # 1-5 (1=excellent, 5=failed)
|
||||||
|
justification: str
|
||||||
|
|
||||||
|
class EvaluationResult(BaseModel):
|
||||||
|
scores: dict[str, ScoreItem]
|
||||||
|
total_score: int
|
||||||
|
quality_level: str
|
||||||
|
strengths: list[str]
|
||||||
|
weaknesses: list[str]
|
||||||
|
improvement_suggestions: list[str]
|
||||||
|
##
|
||||||
|
|
||||||
|
|
||||||
|
# %% Main
|
||||||
|
|
||||||
|
QUALITY_LEVEL_MAP = {
|
||||||
|
1: "Exzellent (1)",
|
||||||
|
2: "Gut (2)",
|
||||||
|
3: "Befriedigend (3)",
|
||||||
|
4: "Ausreichend (4)",
|
||||||
|
5: "Mangelhaft/Ungenügend (5)",
|
||||||
|
0: "Fehlerhaft/Unbekannt"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
input_dir = "./cruscloud/Teil3/Transkripte/"
|
||||||
|
# Hardcoded output directory - CHANGE THIS PATH AS NEEDED
|
||||||
|
output_dir = "./cruscloud/Teil3/Evaluations_moodle_isabella2"
|
||||||
|
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Create timing log file
|
||||||
|
timing_log_path = Path(output_dir) / "evaluation_timing.log"
|
||||||
|
with open(timing_log_path, "w", encoding="utf-8") as log:
|
||||||
|
log.write(f"FEEDBACK EVALUATION TIMING LOG - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||||
|
log.write("="*80 + "\n\n")
|
||||||
|
|
||||||
|
# Create CSV timing file with headers
|
||||||
|
csv_timing_path = Path(output_dir) / "evaluation_timings.csv"
|
||||||
|
with open(csv_timing_path, "w", encoding="utf-8", newline="") as csv_file:
|
||||||
|
csv_writer = csv.writer(csv_file, delimiter=",")
|
||||||
|
# Write CSV header
|
||||||
|
csv_writer.writerow([
|
||||||
|
"Filename",
|
||||||
|
"Total_Time_sec",
|
||||||
|
"API_Evaluation_Time_sec",
|
||||||
|
"Start_Time",
|
||||||
|
"End_Time",
|
||||||
|
"Status",
|
||||||
|
"Average_Score",
|
||||||
|
"Quality_Level",
|
||||||
|
"A1_Score",
|
||||||
|
"A2_Score",
|
||||||
|
"B1_Score",
|
||||||
|
"B2_Score",
|
||||||
|
"C1_Score",
|
||||||
|
"D1_Score",
|
||||||
|
"D2_Score",
|
||||||
|
"E1_Score",
|
||||||
|
"F1_Score"
|
||||||
|
])
|
||||||
|
|
||||||
|
files = list(Path(input_dir).glob("*.txt"))
|
||||||
|
results = {}
|
||||||
|
total_start = time.time()
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
file_start = time.time()
|
||||||
|
start_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
print(f"\n{'='*50}")
|
||||||
|
print(f"Beginne Bewertung: {f.name}")
|
||||||
|
print(f"Startzeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
|
||||||
|
|
||||||
|
# Read input text
|
||||||
|
text = f.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
# Get AI evaluation with timing
|
||||||
|
status = "Success"
|
||||||
|
eval_duration = 0.0
|
||||||
|
|
||||||
|
# We'll evaluate all criteria in one call with strict JSON structure
|
||||||
|
try:
|
||||||
|
eval_start_api = time.time()
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="GPT-OSS-120B",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": EVAL_PROMPT},
|
||||||
|
{"role": "user", "content": text},
|
||||||
|
],
|
||||||
|
response_format={"type": "json_object"},
|
||||||
|
temperature=0.1
|
||||||
|
)
|
||||||
|
eval_duration = time.time() - eval_start_api # API-Zeit gemessen
|
||||||
|
|
||||||
|
print(f" • Gesamtbewertung (API-Laufzeit): {eval_duration:.2f} sec")
|
||||||
|
|
||||||
|
# Parse the JSON response
|
||||||
|
try:
|
||||||
|
parsed_response = json_module.loads(response.choices[0].message.content)
|
||||||
|
|
||||||
|
# Validate structure before passing to Pydantic
|
||||||
|
required_keys = ["scores", "total_score", "quality_level",
|
||||||
|
"strengths", "weaknesses", "improvement_suggestions"]
|
||||||
|
|
||||||
|
# If the response has a different structure, try to fix it
|
||||||
|
if not all(key in parsed_response for key in required_keys):
|
||||||
|
print(f" ! Warnung: Ungewöhnliche Antwortstruktur erkannt. Versuche Konvertierung...")
|
||||||
|
status = "Partial Structure"
|
||||||
|
|
||||||
|
# Create a properly structured response
|
||||||
|
fixed_response = {
|
||||||
|
"scores": {
|
||||||
|
"A1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
|
||||||
|
"A2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
|
||||||
|
"B1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
|
||||||
|
"B2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
|
||||||
|
"C1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
|
||||||
|
"D1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
|
||||||
|
"D2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
|
||||||
|
"E1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
|
||||||
|
"F1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"}
|
||||||
|
},
|
||||||
|
"total_score": 0,
|
||||||
|
"quality_level": "Fehlerhaft",
|
||||||
|
"strengths": ["Strukturfehler in der Bewertung"],
|
||||||
|
"weaknesses": ["Antwortstruktur nicht korrekt"],
|
||||||
|
"improvement_suggestions": ["Überprüfen Sie die Feedback-Struktur"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Try to populate with available data
|
||||||
|
if "scores" in parsed_response:
|
||||||
|
for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]:
|
||||||
|
if key in parsed_response["scores"]:
|
||||||
|
fixed_response["scores"][key] = parsed_response["scores"][key]
|
||||||
|
|
||||||
|
if "total_score" in parsed_response:
|
||||||
|
fixed_response["total_score"] = parsed_response["total_score"]
|
||||||
|
|
||||||
|
if "quality_level" in parsed_response and parsed_response["quality_level"]:
|
||||||
|
fixed_response["quality_level"] = parsed_response["quality_level"]
|
||||||
|
|
||||||
|
if "strengths" in parsed_response and isinstance(parsed_response["strengths"], list):
|
||||||
|
fixed_response["strengths"] = parsed_response["strengths"]
|
||||||
|
|
||||||
|
if "weaknesses" in parsed_response and isinstance(parsed_response["weaknesses"], list):
|
||||||
|
fixed_response["weaknesses"] = parsed_response["weaknesses"]
|
||||||
|
|
||||||
|
if "improvement_suggestions" in parsed_response and isinstance(parsed_response["improvement_suggestions"], list):
|
||||||
|
fixed_response["improvement_suggestions"] = parsed_response["improvement_suggestions"]
|
||||||
|
|
||||||
|
parsed_response = fixed_response
|
||||||
|
|
||||||
|
# Create evaluation object
|
||||||
|
evaluation = EvaluationResult(**parsed_response)
|
||||||
|
results[f.name] = evaluation
|
||||||
|
|
||||||
|
except json_module.JSONDecodeError as e:
|
||||||
|
print(f" ! JSON-Decoding-Fehler: {e}")
|
||||||
|
print(f" ! Antwortinhalt: {response.choices[0].message.content[:200]}...")
|
||||||
|
status = f"JSON Error: {str(e)}"
|
||||||
|
|
||||||
|
# Create a default evaluation with error messages
|
||||||
|
error_explanation = f"FEHLER: Ungültige JSON-Antwort vom API. Details: {str(e)}"
|
||||||
|
default_scores = {
|
||||||
|
key: ScoreItem(score=5, justification=error_explanation)
|
||||||
|
for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
|
||||||
|
}
|
||||||
|
|
||||||
|
evaluation = EvaluationResult(
|
||||||
|
scores=default_scores,
|
||||||
|
total_score=0,
|
||||||
|
quality_level="Fehlerhaft",
|
||||||
|
strengths=["Bewertung fehlgeschlagen"],
|
||||||
|
weaknesses=["Ungültiges JSON-Format"],
|
||||||
|
improvement_suggestions=["Überprüfen Sie die Feedback-Struktur"]
|
||||||
|
)
|
||||||
|
results[f.name] = evaluation
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ! Unerwarteter Fehler: {str(e)}")
|
||||||
|
status = f"API Error: {str(e)}"
|
||||||
|
|
||||||
|
# Create a default evaluation with error messages
|
||||||
|
error_explanation = f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"
|
||||||
|
default_scores = {
|
||||||
|
key: ScoreItem(score=5, justification=error_explanation)
|
||||||
|
for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
|
||||||
|
}
|
||||||
|
|
||||||
|
evaluation = EvaluationResult(
|
||||||
|
scores=default_scores,
|
||||||
|
total_score=0,
|
||||||
|
quality_level="Systemfehler",
|
||||||
|
strengths=["Bewertung fehlgeschlagen"],
|
||||||
|
weaknesses=[f"Technischer Fehler: {str(e)}"],
|
||||||
|
improvement_suggestions=["Kontaktieren Sie den Support"]
|
||||||
|
)
|
||||||
|
results[f.name] = evaluation
|
||||||
|
|
||||||
|
# Calculate the AVERAGE score (not sum)
|
||||||
|
all_scores = [evaluation.scores[key].score for key in evaluation.scores.keys()]
|
||||||
|
valid_scores = [s for s in all_scores if isinstance(s, int)]
|
||||||
|
average_score = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
|
||||||
|
|
||||||
|
# Runden auf die nächste ganze Zahl (natürliche Zahl-Format)
|
||||||
|
rounded_average_score = int(round(average_score))
|
||||||
|
|
||||||
|
# --- Konkrete Qualitätsstufen zuweisen (Neu implementiert) ---
|
||||||
|
if status in ["JSON Error", "API Error", "Systemfehler", "Partial Structure"]:
|
||||||
|
final_quality_level = evaluation.quality_level # Behält Fehlerstatus bei
|
||||||
|
else:
|
||||||
|
# Weist die definierte Qualitätsstufe basierend auf dem Durchschnitt zu
|
||||||
|
final_quality_level = QUALITY_LEVEL_MAP.get(rounded_average_score, "Fehlerhaft/Unbekannt")
|
||||||
|
|
||||||
|
# Überschreibe den Wert im evaluation-Objekt
|
||||||
|
evaluation.quality_level = final_quality_level
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
|
||||||
|
# --- Zeitmessungs-Korrektur (Überprüfung) ---
|
||||||
|
# Gesamtzeit für die Datei (bis zum Ende der Verarbeitung)
|
||||||
|
total_file_duration = time.time() - file_start
|
||||||
|
# Lokale Verarbeitungszeit: Gesamtzeit minus der reinen API-Wartezeit.
|
||||||
|
local_processing_time = total_file_duration - eval_duration
|
||||||
|
if local_processing_time < 0: # Sicherstellen, dass die Zeit nicht negativ wird
|
||||||
|
local_processing_time = 0.0
|
||||||
|
# -------------------------------------
|
||||||
|
|
||||||
|
# Generate detailed text report with timing
|
||||||
|
report = f'''FEEDBACK-EVALUATION BERICHT
|
||||||
|
============================
|
||||||
|
Eingabedatei: {f.name}
|
||||||
|
Erstellungsdatum: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||||||
|
Kursreferenz: "Feedback in der Lehre: Basics" (Hochschulmedizin Dresden)
|
||||||
|
|
||||||
|
VERARBEITUNGSZEITEN
|
||||||
|
----------------------------------------
|
||||||
|
Gesamtverarbeitung: {total_file_duration:.2f} Sekunden
|
||||||
|
• API-Bewertungszeit: {eval_duration:.2f} Sekunden
|
||||||
|
• Lokale Verarbeitungszeit (Lesen, JSON, Bericht): {local_processing_time:.2f} Sekunden
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Add evaluation results
|
||||||
|
report += f'''
|
||||||
|
|
||||||
|
KRITERIENBEWERTUNG
|
||||||
|
----------------------------------------
|
||||||
|
A1 PERSPEKTIVE (Ich-Botschaften): {evaluation.scores['A1'].score}/5
|
||||||
|
(1=exzellent, 5=nicht bestanden)
|
||||||
|
Begründung: {evaluation.scores['A1'].justification}
|
||||||
|
|
||||||
|
A2 RESPEKT & WERTFREIHEIT: {evaluation.scores['A2'].score}/5
|
||||||
|
(1=exzellent, 5=nicht bestanden)
|
||||||
|
Begründung: {evaluation.scores['A2'].justification}
|
||||||
|
|
||||||
|
B1 KONKRETHEIT: {evaluation.scores['B1'].score}/5
|
||||||
|
(1=exzellent, 5=nicht bestanden)
|
||||||
|
Begründung: {evaluation.scores['B1'].justification}
|
||||||
|
|
||||||
|
B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION: {evaluation.scores['B2'].score}/5
|
||||||
|
(1=exzellent, 5=nicht bestanden)
|
||||||
|
Begründung: {evaluation.scores['B2'].justification}
|
||||||
|
|
||||||
|
C1 STRUKTURIERTE LOGIK (WWW/BEB): {evaluation.scores['C1'].score}/5
|
||||||
|
(1=exzellent, 5=nicht bestanden)
|
||||||
|
Begründung: {evaluation.scores['C1'].justification}
|
||||||
|
|
||||||
|
D1 ZUKUNGSORIENTIERTE EMPFEHLUNG: {evaluation.scores['D1'].score}/5
|
||||||
|
(1=exzellent, 5=nicht bestanden)
|
||||||
|
Begründung: {evaluation.scores['D1'].justification}
|
||||||
|
|
||||||
|
D2 WERTSCHÄTZENDER ABSCHLUSS: {evaluation.scores['D2'].score}/5
|
||||||
|
(1=exzellent, 5=nicht bestanden)
|
||||||
|
Begründung: {evaluation.scores['D2'].justification}
|
||||||
|
|
||||||
|
E1 KOMMUNIKATIONSEBENEN: {evaluation.scores['E1'].score}/5
|
||||||
|
(1=exzellent, 5=nicht bestanden)
|
||||||
|
Begründung: {evaluation.scores['E1'].justification}
|
||||||
|
|
||||||
|
F1 FÖRDERUNG VON REFLEXION: {evaluation.scores['F1'].score}/5
|
||||||
|
(1=exzellent, 5=nicht bestanden)
|
||||||
|
Begründung: {evaluation.scores['F1'].justification}
|
||||||
|
|
||||||
|
GESAMTBEWERTUNG
|
||||||
|
----------------------------------------
|
||||||
|
Durchschnittliche Bewertung: {rounded_average_score}/5
|
||||||
|
(1=exzellent, 5=nicht bestanden)
|
||||||
|
|
||||||
|
Qualitätsstufe: {evaluation.quality_level}
|
||||||
|
|
||||||
|
Stärken:
|
||||||
|
'''
|
||||||
|
for strength in evaluation.strengths:
|
||||||
|
report += f"- {strength}\n"
|
||||||
|
|
||||||
|
report += "\nSchwächen:\n"
|
||||||
|
for weakness in evaluation.weaknesses:
|
||||||
|
report += f"- {weakness}\n"
|
||||||
|
|
||||||
|
report += "\nVerbesserungsvorschläge:\n"
|
||||||
|
for suggestion in evaluation.improvement_suggestions:
|
||||||
|
report += f"- {suggestion}\n"
|
||||||
|
|
||||||
|
# Save report to output directory
|
||||||
|
output_path = Path(output_dir) / f"{f.stem}_evaluation.txt"
|
||||||
|
with open(output_path, "w", encoding="utf-8") as out_file:
|
||||||
|
out_file.write(report)
|
||||||
|
|
||||||
|
# Write timing data to CSV
|
||||||
|
with open(csv_timing_path, "a", encoding="utf-8", newline="") as csv_file:
|
||||||
|
csv_writer = csv.writer(csv_file, delimiter=",")
|
||||||
|
csv_writer.writerow([
|
||||||
|
f.name,
|
||||||
|
f"{total_file_duration:.2f}",
|
||||||
|
f"{eval_duration:.2f}",
|
||||||
|
start_time_str,
|
||||||
|
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
|
status,
|
||||||
|
rounded_average_score,
|
||||||
|
evaluation.quality_level,
|
||||||
|
evaluation.scores['A1'].score,
|
||||||
|
evaluation.scores['A2'].score,
|
||||||
|
evaluation.scores['B1'].score,
|
||||||
|
evaluation.scores['B2'].score,
|
||||||
|
evaluation.scores['C1'].score,
|
||||||
|
evaluation.scores['D1'].score,
|
||||||
|
evaluation.scores['D2'].score,
|
||||||
|
evaluation.scores['E1'].score,
|
||||||
|
evaluation.scores['F1'].score
|
||||||
|
])
|
||||||
|
|
||||||
|
# Log timing to central log file
|
||||||
|
with open(timing_log_path, "a", encoding="utf-8") as log:
|
||||||
|
log.write(f"Datei: {f.name}\n")
|
||||||
|
log.write(f"Start: {datetime.datetime.now().strftime('%H:%M:%S')}\n")
|
||||||
|
log.write(f"Dauer: {total_file_duration:.2f} Sekunden\n")
|
||||||
|
log.write("Detailierte Zeiten:\n")
|
||||||
|
log.write(f" • API-Bewertung: {eval_duration:.2f} Sekunden\n")
|
||||||
|
log.write(f" • Lokale Verarbeitung: {local_processing_time:.2f} Sekunden\n")
|
||||||
|
log.write("-"*50 + "\n\n")
|
||||||
|
|
||||||
|
print(f"\nBewertungsbericht erstellt: {output_path}")
|
||||||
|
print(f"Gesamtzeit für {f.name}: {total_file_duration:.2f} Sekunden (API: {eval_duration:.2f}, Lokal: {local_processing_time:.2f})")
|
||||||
|
print(f"{'='*50}")
|
||||||
|
|
||||||
|
total_duration = time.time() - total_start
|
||||||
|
print(f"\n{'='*50}")
|
||||||
|
print(f"ALLE BEWERTUNGEN ABGESCHLOSSEN")
|
||||||
|
print(f"Gesamtverarbeitungszeit: {total_duration:.2f} Sekunden für {len(files)} Dateien")
|
||||||
|
print(f"Durchschnittliche Zeit pro Datei: {total_duration/len(files):.2f} Sekunden")
|
||||||
|
print(f"Bewertungsberichte gespeichert in: {output_dir}")
|
||||||
|
print(f"Timing-Log aktualisiert: {timing_log_path}")
|
||||||
|
print(f"CSV-Timing-Datei erstellt: {csv_timing_path}")
|
||||||
|
print(f"{'='*50}")
|
||||||
|
##
|
||||||
Reference in New Issue
Block a user