WIP: fix: "Failed to fetch" auf Hetzner — Job-Polling statt Langläufer-Request #10
2 changed files with 161 additions and 23 deletions
|
|
@ -6,7 +6,11 @@ import { existsSync } from 'node:fs';
|
|||
import path from 'node:path';
|
||||
import os from 'node:os';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { runClaude, type ClaudeFieldSpec } from './claudeRunner.ts';
|
||||
import {
|
||||
runClaude,
|
||||
type ClaudeFieldSpec,
|
||||
type ClaudeFormResponse,
|
||||
} from './claudeRunner.ts';
|
||||
|
||||
const PORT = Number(process.env.PORT ?? 3001);
|
||||
const HOST = process.env.HOST ?? '127.0.0.1';
|
||||
|
|
@ -37,6 +41,26 @@ const upload = multer({
|
|||
},
|
||||
});
|
||||
|
||||
// Langläufer-Jobs: claude -p dauert 30–120s+. Würde die HTTP-Verbindung so
|
||||
// lange offen gehalten, kappt jeder Reverse-Proxy (oder ein kurzer Netz-
|
||||
// Hänger) sie und der Browser bekommt nur "Failed to fetch". Stattdessen
|
||||
// kehrt POST sofort mit einer Job-ID zurück, das Frontend pollt per GET.
|
||||
type Job =
|
||||
| { status: 'pending'; createdAt: number }
|
||||
| { status: 'done'; createdAt: number; result: ClaudeFormResponse }
|
||||
| { status: 'error'; createdAt: number; error: string };
|
||||
|
||||
const jobs = new Map<string, Job>();
|
||||
const JOB_TTL_MS = 15 * 60 * 1000;
|
||||
|
||||
// Abgelaufene Jobs regelmäßig aufräumen, damit die Map nicht wächst.
|
||||
setInterval(() => {
|
||||
const now = Date.now();
|
||||
for (const [id, job] of jobs) {
|
||||
if (now - job.createdAt > JOB_TTL_MS) jobs.delete(id);
|
||||
}
|
||||
}, 60 * 1000).unref();
|
||||
|
||||
const app = express();
|
||||
|
||||
app.post(
|
||||
|
|
@ -83,11 +107,12 @@ app.post(
|
|||
const tempDir = path.join(os.tmpdir(), `rentenv-${requestId}`);
|
||||
const formName = filenameForMime('form', formFile.mimetype);
|
||||
|
||||
// Quelldateien schreiben — Fehler hier sind echte 5xx (kein claude-Lauf).
|
||||
const sourceNames: string[] = [];
|
||||
try {
|
||||
await mkdir(tempDir, { recursive: true });
|
||||
await writeFile(path.join(tempDir, formName), formFile.buffer);
|
||||
|
||||
const sourceNames: string[] = [];
|
||||
for (let i = 0; i < sourceFiles.length; i++) {
|
||||
const f = sourceFiles[i];
|
||||
const name = filenameForMime(`source_${i + 1}`, f.mimetype);
|
||||
|
|
@ -98,28 +123,72 @@ app.post(
|
|||
if (sourceText.length > 0) {
|
||||
await writeFile(path.join(tempDir, 'source_text.txt'), sourceText);
|
||||
}
|
||||
} catch (e: unknown) {
|
||||
if (existsSync(tempDir)) {
|
||||
rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
||||
}
|
||||
next(e);
|
||||
return;
|
||||
}
|
||||
|
||||
const result = await runClaude(tempDir, {
|
||||
// Job registrieren und SOFORT antworten — claude läuft im Hintergrund.
|
||||
jobs.set(requestId, { status: 'pending', createdAt: Date.now() });
|
||||
res.status(202).json({ jobId: requestId });
|
||||
|
||||
runClaude(tempDir, {
|
||||
formFilename: formName,
|
||||
sourceFilenames: sourceNames,
|
||||
hasSourceText: sourceText.length > 0,
|
||||
fields: fieldSpecs,
|
||||
})
|
||||
.then((result) => {
|
||||
jobs.set(requestId, {
|
||||
status: 'done',
|
||||
createdAt: Date.now(),
|
||||
result,
|
||||
});
|
||||
res.json(result);
|
||||
} catch (e: unknown) {
|
||||
next(e);
|
||||
} finally {
|
||||
})
|
||||
.catch((e: unknown) => {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
console.error('[claude job error]', requestId, msg);
|
||||
jobs.set(requestId, {
|
||||
status: 'error',
|
||||
createdAt: Date.now(),
|
||||
error: msg,
|
||||
});
|
||||
})
|
||||
.finally(() => {
|
||||
if (existsSync(tempDir)) {
|
||||
rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
// Status/Ergebnis eines Jobs. Bewusst kurze Requests -> proxy-sicher.
|
||||
app.get('/api/process/:jobId', (req: Request, res: Response) => {
|
||||
const job = jobs.get(req.params.jobId);
|
||||
if (!job) {
|
||||
res
|
||||
.status(404)
|
||||
.json({ error: 'Job nicht gefunden (abgelaufen oder Server neugestartet).' });
|
||||
return;
|
||||
}
|
||||
if (job.status === 'pending') {
|
||||
res.json({ status: 'pending' });
|
||||
return;
|
||||
}
|
||||
if (job.status === 'error') {
|
||||
res.json({ status: 'error', error: 'Claude CLI failed', details: job.error });
|
||||
return;
|
||||
}
|
||||
res.json({ status: 'done', result: job.result });
|
||||
});
|
||||
|
||||
app.use((err: unknown, _req: Request, res: Response, _next: NextFunction) => {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.error('[server error]', msg);
|
||||
res.status(502).json({ error: 'Claude CLI failed', details: msg });
|
||||
res.status(500).json({ error: 'Serverfehler beim Vorbereiten', details: msg });
|
||||
});
|
||||
|
||||
app.get('/api/health', (_req, res) => {
|
||||
|
|
|
|||
|
|
@ -33,6 +33,78 @@ export async function processDocuments(
|
|||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(await errorMessageFrom(res));
|
||||
}
|
||||
|
||||
const { jobId } = (await res.json()) as { jobId?: string };
|
||||
if (!jobId) throw new Error('Server hat keine Job-ID zurückgegeben.');
|
||||
|
||||
return pollJob(jobId);
|
||||
}
|
||||
|
||||
const POLL_INTERVAL_MS = 2000;
|
||||
const MAX_WAIT_MS = 6 * 60 * 1000;
|
||||
const MAX_CONSECUTIVE_NETWORK_ERRORS = 8;
|
||||
|
||||
// Pollt den Job-Status. Kurze Requests -> ein Proxy-Timeout oder ein kurzer
|
||||
// Netz-Hänger killt nicht mehr die ganze Analyse, sondern wird einfach beim
|
||||
// nächsten Tick erneut versucht.
|
||||
async function pollJob(jobId: string): Promise<FormResponse> {
|
||||
const start = Date.now();
|
||||
let networkErrors = 0;
|
||||
|
||||
while (true) {
|
||||
if (Date.now() - start > MAX_WAIT_MS) {
|
||||
throw new Error('Zeitüberschreitung beim Warten auf die Analyse.');
|
||||
}
|
||||
await delay(POLL_INTERVAL_MS);
|
||||
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(`/api/process/${jobId}`);
|
||||
networkErrors = 0;
|
||||
} catch {
|
||||
// "Failed to fetch" beim Pollen = transient -> weiter versuchen.
|
||||
if (++networkErrors >= MAX_CONSECUTIVE_NETWORK_ERRORS) {
|
||||
throw new Error(
|
||||
'Verbindung zum Server verloren. Läuft der Backend-Container noch?'
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (res.status === 404) {
|
||||
throw new Error(
|
||||
'Analyse-Job nicht mehr vorhanden (Server neugestartet?). Bitte erneut versuchen.'
|
||||
);
|
||||
}
|
||||
if (!res.ok) {
|
||||
throw new Error(await errorMessageFrom(res));
|
||||
}
|
||||
|
||||
const data = (await res.json()) as {
|
||||
status?: 'pending' | 'done' | 'error';
|
||||
result?: FormResponse;
|
||||
error?: string;
|
||||
details?: string;
|
||||
};
|
||||
|
||||
if (data.status === 'pending') continue;
|
||||
if (data.status === 'done' && data.result) return data.result;
|
||||
if (data.status === 'error') {
|
||||
let message = data.error ?? 'Claude CLI failed';
|
||||
if (data.details) message += ` — ${data.details}`;
|
||||
throw new Error(message);
|
||||
}
|
||||
throw new Error('Unerwartete Antwort vom Server.');
|
||||
}
|
||||
}
|
||||
|
||||
function delay(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function errorMessageFrom(res: Response): Promise<string> {
|
||||
let message = `Server antwortete mit ${res.status}`;
|
||||
try {
|
||||
const data = await res.json();
|
||||
|
|
@ -41,8 +113,5 @@ export async function processDocuments(
|
|||
} catch {
|
||||
// fall through
|
||||
}
|
||||
throw new Error(message);
|
||||
}
|
||||
|
||||
return (await res.json()) as FormResponse;
|
||||
return message;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue