Misty-Rhetorik-Coach/old/test.py
2026-05-06 12:46:05 +00:00

68 lines
2 KiB
Python

import whisper
import numpy as np
import subprocess
import time
import sys
from config import RTSP_URL, MISTY_IP
def run_test_inference():
print(f"--- Finaler Test-Lauf (SDK-Struktur) ---")
# Modell laden
print("Lade Whisper-Modell...")
model = whisper.load_model("base")
# FFmpeg-Befehl mit mehr "Geduld" (analyzeduration & probesize)
# Das hilft, wenn Misty den Stream langsam startet
command = [
'ffmpeg',
'-rtsp_transport', 'tcp',
'-analyzeduration', '5000000',
'-probesize', '5000000',
'-i', RTSP_URL,
'-ar', '16000',
'-ac', '1',
'-f', 's16le',
'-'
]
print(f"\nVersuche Verbindung zu: {RTSP_URL}")
# Startet den Prozess
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
audio_buffer = []
print("[MISTY HÖRT ZU] Sprich jetzt... (Sammle 10 Sekunden Audio)")
start_time = time.time()
try:
# Wir sammeln jetzt erst mal stumpf 10 Sekunden, um den Puffer zu füllen
while time.time() - start_time < 10:
raw_chunk = process.stdout.read(3200)
if raw_chunk:
audio_buffer.append(np.frombuffer(raw_chunk, dtype=np.int16))
sys.stdout.write(".")
sys.stdout.flush()
process.terminate()
if not audio_buffer:
# Wenn nichts kam, schauen wir in den Error-Log von FFmpeg
_, stderr = process.communicate()
print(f"\n❌ FFmpeg Fehler-Log:\n{stderr.decode()}")
return
print("\n\nAnalyse startet...")
full_audio = np.concatenate(audio_buffer).astype(np.float32) / 32768.0
result = model.transcribe(full_audio, language="de")
print("\n" + "="*40)
print(f"ERGEBNIS: {result['text'].strip()}")
print("="*40 + "\n")
except Exception as e:
print(f"Fehler: {e}")
finally:
process.kill()
if __name__ == "__main__":
run_test_inference()