from fastapi import FastAPI from langserve import add_routes from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_huggingface import HuggingFacePipeline from fastapi.middleware.cors import CORSMiddleware import torch import transformers # Reduce logs transformers.logging.set_verbosity_error() # Create FastAPI app app = FastAPI( title="Phi-3 Code Explainer API", description="Explains Python code using the Phi-3 Mini model and LangChain.", version="1.0" ) # CORS settings app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Load model/tokenizer safely for CPU-only spaces model_id = "microsoft/phi-3-mini-4k-instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) # If GPU available use float16, else default device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = AutoModelForCausalLM.from_pretrained(model_id).to(device) # Generation pipeline generator = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256, return_full_text=False, do_sample=True, temperature=0.7 ) # LangChain pipeline llm = HuggingFacePipeline(pipeline=generator) prompt = ChatPromptTemplate.from_template( """Instruction: Explain this Python code step-by-step: Use code with caution {code} Explanation:""" ) parser = StrOutputParser() chain = prompt | llm | parser # Add route add_routes(app, chain, path="/explain") # Run server (necessary for Hugging Face Spaces) if __name__ == "__main__": import uvicorn uvicorn.run("app:app", host="0.0.0.0", port=7860)