| """ |
| Stateless Interview Chatbot Backend for HTML Frontend |
| All state management happens in the HTML/localStorage. |
| This backend only processes requests and returns responses. |
| |
| NOW WITH CONTEXT MANAGEMENT: |
| - Automatically creates summaries when approaching token limits |
| - Keeps recent messages + summary of older ones |
| - Interviewer can continue indefinitely without hitting context limits |
| """ |
|
|
| import os |
| import gradio as gr |
| from datetime import datetime |
| from openai import OpenAI |
| from google import genai |
| from github import Github |
| from slugify import slugify |
| import github |
|
|
| |
| INTERVIEWER_BASE_URL = os.getenv("INTERVIEWER_BASE_URL", "http://localhost:8000/v1") |
| INTERVIEWER_API_KEY = os.getenv("INTERVIEWER_API_KEY", "") |
| INTERVIEWER_MODEL = os.getenv("INTERVIEWER_MODEL", "gpt-4") |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "") |
| GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "") |
| GITHUB_REPO = os.getenv("GITHUB_REPO", "") |
| GITHUB_BRANCH = os.getenv("GITHUB_BRANCH", "main") |
|
|
| |
| MAX_CONTEXT_TOKENS = 25000 |
| KEEP_RECENT_MESSAGES = 20 |
|
|
| INITIAL_GREETING = """Hello! I'm here to help you share your project story with the community. |
| |
| **Before we begin:** |
| |
| I'll be using AI to conduct this interview and organize your responses into a well-structured article. The article will be submitted to a GitHub repository for review. |
| |
| **To get started, please share:** |
| 1. **Your project name** (or working title) |
| 2. **A brief confirmation** that you're okay with AI helping to organize and write up this interview |
| |
| Once I have that, we'll dive into your project's journey—from the initial spark, through challenges and decisions, to the real-world impact you've created!""" |
|
|
|
|
| def load_interview_instructions() -> str: |
| """Load extra instructions for article generation from file.""" |
| instructions_path = os.path.join(os.path.dirname(__file__), "workflow_instructions.md") |
| if os.path.exists(instructions_path): |
| with open(instructions_path, "r", encoding="utf-8") as f: |
| return f.read() |
| return "" |
|
|
|
|
| def load_article_instructions() -> str: |
| """Load extra instructions for article generation from file.""" |
| instructions_path = os.path.join(os.path.dirname(__file__), "article_instructions.md") |
| if os.path.exists(instructions_path): |
| with open(instructions_path, "r", encoding="utf-8") as f: |
| return f.read() |
| return "" |
|
|
|
|
| ARTICLE_GENERATION_PROMPT = """You are an expert editor who transforms interview transcripts into compelling case study articles. |
| Based on the following interview conversation, create a well-structured markdown article that tells the story of this project. |
| {extra_instructions} |
| **Interview Transcript:** |
| {transcript} |
| Generate the article in markdown format. Make it informative, inspiring, and practical for readers who might face similar challenges.""" |
|
|
|
|
| def estimate_tokens(text: str) -> int: |
| """Rough token estimate (4 chars ≈ 1 token).""" |
| return len(text) // 4 |
|
|
|
|
| def create_summary_with_context(previous_summary: str, new_messages: list[dict]) -> str: |
| """ |
| Create an updated summary that incorporates both the previous summary and new messages. |
| This avoids losing context when re-summarizing. |
| """ |
| try: |
| client = OpenAI( |
| base_url=INTERVIEWER_BASE_URL, |
| api_key=INTERVIEWER_API_KEY, |
| ) |
| |
| |
| transcript = "" |
| for msg in new_messages: |
| role = "Interviewer" if msg["role"] == "assistant" else "Interviewee" |
| transcript += f"{role}: {msg['content']}\n\n" |
| |
| summary_prompt = f"""You previously created this summary of an interview: |
| |
| {previous_summary} |
| |
| Now update it to include the following additional conversation that happened after: |
| |
| {transcript} |
| |
| Provide an updated comprehensive summary that: |
| - Preserves all key information from the previous summary |
| - Integrates the new conversation details |
| - Maintains all facts, decisions, challenges, solutions, and metrics |
| - Keeps it detailed but concise""" |
| |
| response = client.chat.completions.create( |
| model=INTERVIEWER_MODEL, |
| messages=[{"role": "user", "content": summary_prompt}], |
| max_tokens=1500, |
| temperature=0.3, |
| ) |
| |
| summary = response.choices[0].message.content |
| print(f"✓ Updated summary ({estimate_tokens(summary)} tokens)") |
| return summary |
| |
| except Exception as e: |
| print(f"Summary update failed: {e}") |
| |
| return previous_summary + "\n\nAdditional context: Continued detailed discussion of the project." |
|
|
|
|
| def create_summary(history: list[dict]) -> str: |
| """ |
| Create a summary of conversation history using the interviewer model. |
| This preserves context while reducing token count. |
| """ |
| try: |
| client = OpenAI( |
| base_url=INTERVIEWER_BASE_URL, |
| api_key=INTERVIEWER_API_KEY, |
| ) |
| |
| |
| transcript = "" |
| for msg in history: |
| role = "Interviewer" if msg["role"] == "assistant" else "Interviewee" |
| transcript += f"{role}: {msg['content']}\n\n" |
| |
| summary_prompt = f"""Summarize this interview conversation comprehensively. Preserve: |
| - Project name and key details |
| - All technical challenges and solutions discussed |
| - Important decisions and their rationale |
| - Metrics, outcomes, and impact mentioned |
| - Any specific technologies, tools, or frameworks |
| - Timeline and context information |
| |
| Keep the summary detailed enough that the interviewer can continue naturally. |
| |
| CONVERSATION: |
| {transcript} |
| |
| Provide a comprehensive summary:""" |
| |
| response = client.chat.completions.create( |
| model=INTERVIEWER_MODEL, |
| messages=[{"role": "user", "content": summary_prompt}], |
| max_tokens=1500, |
| temperature=0.3, |
| ) |
| |
| summary = response.choices[0].message.content |
| print(f"✓ Created summary ({estimate_tokens(summary)} tokens)") |
| return summary |
| |
| except Exception as e: |
| print(f"Summary creation failed: {e}") |
| |
| return "Previous conversation covered project details and initial discussion." |
|
|
|
|
| def chat(history: list[dict], user_message: str) -> dict: |
| """ |
| Process a chat message and return updated history. |
| Stateless - all state comes from client. |
| |
| SMART CONTEXT MANAGEMENT: |
| - Monitors token count |
| - When approaching limit, creates ONE summary and stores it in history |
| - Summary stored as special message: {"role": "system", "content": "...", "_type": "summary"} |
| - On subsequent calls, reuses existing summary instead of re-summarizing |
| - Periodically re-summarizes when recent messages grow too long |
| |
| Args: |
| history: List of message dicts with 'role' and 'content' |
| user_message: New message from user |
| |
| Returns: |
| dict with 'history' and 'error' (if any) |
| """ |
| try: |
| if not INTERVIEWER_API_KEY or not INTERVIEWER_BASE_URL: |
| return { |
| "history": history, |
| "error": "Interviewer API not configured" |
| } |
| |
| |
| new_history = history.copy() if history else [] |
| new_history.append({"role": "user", "content": user_message}) |
| |
| |
| client = OpenAI( |
| base_url=INTERVIEWER_BASE_URL, |
| api_key=INTERVIEWER_API_KEY, |
| ) |
| |
| |
| system_instructions = load_interview_instructions() |
| |
| |
| existing_summary = None |
| summary_index = -1 |
| for i, msg in enumerate(new_history): |
| if msg.get("_type") == "summary": |
| existing_summary = msg["content"] |
| summary_index = i |
| break |
| |
| |
| total_tokens = estimate_tokens(system_instructions) |
| for msg in new_history: |
| if msg.get("_type") != "summary": |
| total_tokens += estimate_tokens(msg["content"]) |
| |
| print(f"Total tokens: ~{total_tokens} (limit: {MAX_CONTEXT_TOKENS})") |
| if existing_summary: |
| print(f" Found existing summary at index {summary_index}") |
| |
| |
| messages = [{"role": "system", "content": system_instructions}] |
| |
| |
| if existing_summary: |
| |
| |
| messages_after_summary = [m for i, m in enumerate(new_history) |
| if i > summary_index and m.get("_type") != "summary"] |
| |
| |
| tokens_after_summary = sum(estimate_tokens(m["content"]) for m in messages_after_summary) |
| |
| if tokens_after_summary > MAX_CONTEXT_TOKENS * 0.6: |
| |
| print(f"⚠ Re-summarizing: {tokens_after_summary} tokens after previous summary") |
| |
| |
| if len(messages_after_summary) > KEEP_RECENT_MESSAGES: |
| old_msgs_to_summarize = messages_after_summary[:-KEEP_RECENT_MESSAGES] |
| recent_messages = messages_after_summary[-KEEP_RECENT_MESSAGES:] |
| |
| |
| new_summary = create_summary_with_context(existing_summary, old_msgs_to_summarize) |
| |
| |
| new_history = [m for i, m in enumerate(new_history) if i != summary_index] |
| |
| |
| |
| insert_pos = min(2, len(new_history)) |
| new_history.insert(insert_pos, { |
| "role": "system", |
| "content": new_summary, |
| "_type": "summary", |
| "_summarized_count": len(old_msgs_to_summarize) |
| }) |
| |
| |
| messages.append({ |
| "role": "system", |
| "content": f"""CONVERSATION SUMMARY (updated): |
| |
| {new_summary} |
| |
| --- |
| |
| Continue the interview based on this context and recent messages below.""" |
| }) |
| |
| |
| for msg in recent_messages: |
| messages.append({"role": msg["role"], "content": msg["content"]}) |
| |
| print(f"✓ Re-summarized {len(old_msgs_to_summarize)} messages, keeping {len(recent_messages)} recent") |
| else: |
| |
| messages.append({ |
| "role": "system", |
| "content": f"""PREVIOUS CONVERSATION SUMMARY: |
| |
| {existing_summary} |
| |
| --- |
| |
| Continue the interview based on this context and recent messages below.""" |
| }) |
| |
| for msg in messages_after_summary: |
| messages.append({"role": msg["role"], "content": msg["content"]}) |
| else: |
| |
| print(f"✓ Reusing existing summary ({tokens_after_summary} tokens after summary)") |
| |
| messages.append({ |
| "role": "system", |
| "content": f"""PREVIOUS CONVERSATION SUMMARY: |
| |
| {existing_summary} |
| |
| --- |
| |
| Continue the interview based on this context and recent messages below.""" |
| }) |
| |
| for msg in messages_after_summary: |
| messages.append({"role": msg["role"], "content": msg["content"]}) |
| |
| elif total_tokens > MAX_CONTEXT_TOKENS and len(new_history) > KEEP_RECENT_MESSAGES: |
| |
| old_messages = new_history[:-KEEP_RECENT_MESSAGES] |
| recent_messages = new_history[-KEEP_RECENT_MESSAGES:] |
| |
| print(f"⚠ First summarization! Summarizing {len(old_messages)} older messages...") |
| |
| |
| summary = create_summary(old_messages) |
| |
| |
| insert_pos = min(2, len(new_history)) |
| new_history.insert(insert_pos, { |
| "role": "system", |
| "content": summary, |
| "_type": "summary", |
| "_summarized_count": len(old_messages) |
| }) |
| |
| |
| messages.append({ |
| "role": "system", |
| "content": f"""PREVIOUS CONVERSATION SUMMARY: |
| |
| {summary} |
| |
| --- |
| |
| You are now continuing the interview. The summary above covers earlier discussion. |
| Continue naturally based on this context and the recent messages below.""" |
| }) |
| |
| |
| for msg in recent_messages: |
| messages.append({"role": msg["role"], "content": msg["content"]}) |
| |
| new_token_estimate = ( |
| estimate_tokens(system_instructions) + |
| estimate_tokens(summary) + |
| sum(estimate_tokens(m["content"]) for m in recent_messages) |
| ) |
| print(f"✓ After first summary: ~{new_token_estimate} tokens") |
| |
| else: |
| |
| for msg in new_history: |
| if msg.get("_type") != "summary": |
| messages.append({"role": msg["role"], "content": msg["content"]}) |
| |
| |
| response = client.chat.completions.create( |
| model=INTERVIEWER_MODEL, |
| messages=messages, |
| max_tokens=1024, |
| temperature=0.7, |
| ) |
| |
| assistant_message = response.choices[0].message.content |
| new_history.append({"role": "assistant", "content": assistant_message}) |
| |
| return { |
| "history": new_history, |
| "error": None |
| } |
| |
| except Exception as e: |
| error_msg = str(e) |
| |
| print(f"Chat error: {error_msg}") |
| |
| |
| if "quota" in error_msg.lower() or "rate" in error_msg.lower() or "limit" in error_msg.lower() or "429" in error_msg: |
| return { |
| "history": history, |
| "error": "AI quota exceeded. Please try again after 8 PM today or tomorrow morning." |
| } |
| elif "401" in error_msg or "authentication" in error_msg.lower() or "unauthorized" in error_msg.lower(): |
| return { |
| "history": history, |
| "error": "Authentication failed. API key may be invalid or expired." |
| } |
| elif "400" in error_msg: |
| return { |
| "history": history, |
| "error": "AI service error. The AI may be temporarily unavailable. Please try again later." |
| } |
| elif "timeout" in error_msg.lower() or "timed out" in error_msg.lower(): |
| return { |
| "history": history, |
| "error": "Request timed out. The AI service may be busy. Please try again in a moment." |
| } |
| elif "connection" in error_msg.lower() or "network" in error_msg.lower(): |
| return { |
| "history": history, |
| "error": "Connection failed. Please check your internet connection and try again." |
| } |
| return { |
| "history": history, |
| "error": f"AI temporarily unavailable. Please try again after 8 PM today or tomorrow. (Error: {error_msg[:100]})" |
| } |
|
|
|
|
| def generate_article(history: list[dict]) -> dict: |
| """ |
| Generate article from interview history. |
| |
| NOTE: Gemini has 2M token context, so no summarization needed here. |
| |
| Args: |
| history: Complete conversation history |
| |
| Returns: |
| dict with 'article' and 'error' (if any) |
| """ |
| try: |
| if not GEMINI_API_KEY: |
| return { |
| "article": None, |
| "error": "Gemini API key not configured" |
| } |
| |
| if len(history) < 4: |
| return { |
| "article": None, |
| "error": "Please have a longer interview before generating article" |
| } |
| |
| client = genai.Client(api_key=GEMINI_API_KEY) |
| |
| |
| transcript = "" |
| for msg in history: |
| role = "Interviewer" if msg["role"] == "assistant" else "Interviewee" |
| transcript += f"**{role}:** {msg['content']}\n\n" |
| |
| |
| extra_instructions = load_article_instructions() |
| |
| prompt = ARTICLE_GENERATION_PROMPT.format( |
| transcript=transcript, |
| extra_instructions=extra_instructions if extra_instructions else "Use best practices for case study writing." |
| ) |
| |
| response = client.models.generate_content( |
| model="gemini-pro-latest", |
| contents=prompt |
| ) |
| |
| return { |
| "article": response.text, |
| "error": None |
| } |
| |
| except Exception as e: |
| return { |
| "article": None, |
| "error": f"Failed to generate article: {str(e)}" |
| } |
|
|
|
|
| def submit_article(article_content: str) -> dict: |
| """ |
| Submit article to GitHub. |
| |
| Args: |
| article_content: The markdown article |
| |
| Returns: |
| dict with 'status', 'url', 'filename', and 'error' |
| """ |
| try: |
| if not GITHUB_TOKEN or not GITHUB_REPO: |
| return { |
| "status": "error", |
| "error": "GitHub not configured", |
| "url": None, |
| "filename": None |
| } |
| |
| g = Github(auth=github.Auth.Token(GITHUB_TOKEN)) |
| repo = g.get_repo(GITHUB_REPO) |
| |
| |
| date_str = datetime.now().strftime("%Y-%m-%d") |
| lines = article_content.split('\n') |
| title_line = next((l for l in lines if l.startswith('# ')), None) |
| |
| if title_line: |
| slug = slugify(title_line[2:].strip()[:50]) |
| else: |
| slug = f"interview-{datetime.now().strftime('%H%M%S')}" |
| |
| filename = f"_draft/{date_str}-{slug}.md" |
| |
| |
| front_matter = f"""--- |
| date: {date_str} |
| status: draft |
| source: interview-chatbot |
| --- |
| """ |
| full_content = front_matter + article_content |
| |
| |
| repo.create_file( |
| path=filename, |
| message=f"Add draft article: {slug}", |
| content=full_content, |
| branch=GITHUB_BRANCH |
| ) |
| |
| github_url = f"https://github.com/{GITHUB_REPO}/blob/{GITHUB_BRANCH}/{filename}" |
| |
| return { |
| "status": "success", |
| "url": github_url, |
| "filename": filename, |
| "error": None |
| } |
| |
| except Exception as e: |
| return { |
| "status": "error", |
| "error": str(e), |
| "url": None, |
| "filename": None |
| } |
|
|
|
|
| def get_initial_greeting() -> str: |
| """Return the initial greeting message.""" |
| return INITIAL_GREETING |
|
|
|
|
| |
| with gr.Blocks(title="Interview Chatbot API") as demo: |
| gr.Markdown("# Interview Chatbot Backend API") |
| gr.Markdown("This is a stateless backend with **automatic context management**.") |
| gr.Markdown("Long interviews are automatically summarized to stay within token limits.") |
| |
| with gr.Tab("API Documentation"): |
| gr.Markdown(""" |
| ## Available Endpoints |
| |
| ### POST /api/chat |
| **Input:** `[history, user_message]` |
| - `history`: Array of message objects `[{{role, content}}, ...]` |
| - `user_message`: String |
| |
| **Output:** `{{history, error}}` |
| |
| **Context Management:** Automatically creates summaries when approaching token limits |
| |
| ### POST /api/generate_article |
| **Input:** `[history]` |
| - `history`: Array of message objects |
| |
| **Output:** `{{article, error}}` |
| |
| ### POST /api/submit_article |
| **Input:** `[article_content]` |
| - `article_content`: Markdown string |
| |
| **Output:** `{{status, url, filename, error}}` |
| |
| ### GET /api/get_initial_greeting |
| **Output:** Initial greeting string |
| |
| ## Settings |
| |
| - **Max Context:** {MAX_CONTEXT_TOKENS:,} tokens |
| - **Recent Messages Kept:** {KEEP_RECENT_MESSAGES} (last exchanges preserved) |
| - **Article Generator:** Gemini 2.5 Pro (2M token context - no limit) |
| """.format(MAX_CONTEXT_TOKENS=MAX_CONTEXT_TOKENS, KEEP_RECENT_MESSAGES=KEEP_RECENT_MESSAGES)) |
| |
| with gr.Tab("Test Interface"): |
| with gr.Row(): |
| test_history = gr.JSON(label="History", value=[]) |
| test_message = gr.Textbox(label="User Message", placeholder="Type a message...") |
| |
| test_chat_btn = gr.Button("Test Chat") |
| test_output = gr.JSON(label="Response") |
| |
| test_chat_btn.click( |
| fn=chat, |
| inputs=[test_history, test_message], |
| outputs=[test_output] |
| ) |
| |
| |
| |
| |
| with gr.Row(visible=False): |
| |
| chat_history_input = gr.JSON() |
| chat_message_input = gr.Textbox() |
| chat_output = gr.JSON() |
| chat_btn = gr.Button("Chat") |
| chat_btn.click( |
| fn=chat, |
| inputs=[chat_history_input, chat_message_input], |
| outputs=[chat_output], |
| api_name="chat" |
| ) |
| |
| |
| gen_history_input = gr.JSON() |
| gen_output = gr.JSON() |
| gen_btn = gr.Button("Generate") |
| gen_btn.click( |
| fn=generate_article, |
| inputs=[gen_history_input], |
| outputs=[gen_output], |
| api_name="generate_article" |
| ) |
| |
| |
| submit_input = gr.Textbox() |
| submit_output = gr.JSON() |
| submit_btn_api = gr.Button("Submit") |
| submit_btn_api.click( |
| fn=submit_article, |
| inputs=[submit_input], |
| outputs=[submit_output], |
| api_name="submit_article" |
| ) |
| |
| |
| greeting_output = gr.Textbox() |
| greeting_btn = gr.Button("Greeting") |
| greeting_btn.click( |
| fn=get_initial_greeting, |
| inputs=[], |
| outputs=[greeting_output], |
| api_name="get_initial_greeting" |
| ) |
| |
|
|
| if __name__ == "__main__": |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| share=False, |
| ) |